diff --git a/etc/sphinx.conf b/etc/sphinx.conf index 93f7f8e..7962be8 100644 --- a/etc/sphinx.conf +++ b/etc/sphinx.conf @@ -41,6 +41,19 @@ } +source tag +{ + type = mysql + sql_host = localhost + sql_db = sphinx + sql_user = sphinx + sql_pass = sphinx + + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, `tag` FROM `tag` + +} + index main1 { @@ -60,6 +73,14 @@ } +index tag1 +{ + source = tag + path = /var/data/tag1 + docinfo = extern + charset_type = utf-8 +} + indexer { diff --git a/src/message.c b/src/message.c index 9e7fb9f..6bf0714 100644 --- a/src/message.c +++ b/src/message.c @@ -140,6 +140,9 @@ } + fix_email_address_for_sphinx(state->b_from); + fix_email_address_for_sphinx(state->b_to); + memset(bind, 0, sizeof(bind)); diff --git a/src/parser.c b/src/parser.c index e17abab..e041267 100644 --- a/src/parser.c +++ b/src/parser.c @@ -51,8 +51,6 @@ trimBuffer(state.b_subject); fixupEncodedHeaderLine(state.b_subject); - state.message_state = MSG_SUBJECT; - translateLine((unsigned char*)&state.b_subject, &state); for(i=1; i<=state.n_attachments; i++){ diff --git a/src/parser.h b/src/parser.h index e4ffd47..156d7fc 100644 --- a/src/parser.h +++ b/src/parser.h @@ -22,6 +22,7 @@ void markHTML(char *buf, struct _state *state); int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state); void translateLine(unsigned char *p, struct _state *state); +void fix_email_address_for_sphinx(char *s); void reassembleToken(char *p); void degenerateToken(unsigned char *p); void fixURL(char *url); diff --git a/src/parser_utils.c b/src/parser_utils.c index 14ba5ae..fd6df17 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -203,15 +203,11 @@ memset(puf, 0, sizeof(puf)); - //printf("hdr: *%s*\n", buf); - q = buf; do { q = split_str(q, " ", v, sizeof(v)-1); - //printf("v: %s\n", v); - p = v; do { @@ -219,7 +215,6 @@ if(start){ *start = '\0'; if(strlen(p) > 0){ - //printf("flushed, no decode: *%s*\n", p); strncat(puf, p, sizeof(puf)-1); } @@ -233,31 +228,24 @@ end = strstr(s+3, "?="); if(end){ *end = '\0'; - //printf("ez az: *%s*\n", s+3); + if(sb){ decodeBase64(s+3); } if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } } - - //printf("dekodolva: *%s*\n", s+3); - - //printf("start: %s\n", start+1); if(strncasecmp(start+1, "utf-8", 5) == 0) decodeUTF8(s+3); strncat(puf, s+3, sizeof(puf)-1); p = end + 2; - //printf("maradek: +%s+\n", p); } } else { - //printf("aaaa: *%s*\n", start); strncat(puf, start, sizeof(puf)-1); break; } } else { - //printf("keiene dekod: +%s+\n", p); strncat(puf, p, sizeof(puf)-1); break; } @@ -268,8 +256,6 @@ } while(q); - //printf("=> *%s*\n", puf); - snprintf(buf, MAXBUFSIZE-1, "%s", puf); } @@ -464,6 +450,13 @@ } +void fix_email_address_for_sphinx(char *s){ + for(; *s; s++){ + if(*s == '@' || *s == '.' || *s == '+') *s = 'X'; + } +} + + /* * reassemble 'V i a g r a' to 'Viagra' */ diff --git a/util/db-mysql.sql b/util/db-mysql.sql index a5a7a53..38156d7 100644 --- a/util/db-mysql.sql +++ b/util/db-mysql.sql @@ -1,12 +1,12 @@ drop table if exists `sph_counter`; -create table `sph_counter` ( +create table if not exists `sph_counter` ( `counter_id` int not null, `max_doc_id` int not null, primary key (`counter_id`) ); drop table if exists `sph_index`; -create table `sph_index` ( +create table if not exists `sph_index` ( `id` bigint not null, `from` char(255) default null, `to` text(512) default null, @@ -21,7 +21,7 @@ drop table if exists `metadata`; -create table `metadata` ( +create table if not exists `metadata` ( `id` bigint unsigned not null auto_increment, `from` char(255) not null, `subject` text(512) default null, @@ -44,7 +44,7 @@ drop table if exists `rcpt`; -create table `rcpt` ( +create table if not exists `rcpt` ( `id` bigint unsigned not null, `to` char(64) not null, unique(`id`,`to`) @@ -54,8 +54,11 @@ create index `rcpt_idx2` on `rcpt`(`to`); +drop view if exists `messages`; +create view `messages` AS select `metadata`.`id` AS `id`,`metadata`.`piler_id` AS `piler_id`,`metadata`.`from` AS `from`,`rcpt`.`to` AS `to`,`metadata`.`subject` AS `subject` from (`metadata` join `rcpt`) where (`metadata`.`id` = `rcpt`.`id`); + drop table if exists `attachment`; -create table `attachment` ( +create table if not exists `attachment` ( `id` bigint unsigned not null auto_increment, `piler_id` char(36) not null, `attachment_id` int not null, @@ -71,8 +74,15 @@ create index `attachment_idx2` on `attachment`(`sig`); +drop table if exists `tag`; +create table if not exists `tag` ( + `id` bigint not null unique, + `tag` char(255) default null +); + + drop table if exists `archiving_rule`; -create table `archiving_rule` ( +create table if not exists `archiving_rule` ( `id` bigint unsigned not null auto_increment, `from` char(128) default null, `to` char(255) default null, @@ -95,7 +105,7 @@ insert into `counter` values(0, 0, 0); drop table if exists `search`; -create table `search` ( +create table if not exists `search` ( `email` char(128) not null, `ts` int default 0, `term` text(512) not null