diff --git a/src/message.c b/src/message.c index 5c75583..1b281fa 100644 --- a/src/message.c +++ b/src/message.c @@ -20,7 +20,7 @@ int store_index_data(struct session_data *sdata, struct parser_state *state, struct data *data, uint64 id, struct config *cfg){ int rc=ERR; - char *subj; + char *subj, *sender=state->b_from, *sender_domain=state->b_from_domain; struct sql sql; if(data->folder == 0){ @@ -34,18 +34,24 @@ if(prepare_sql_statement(sdata, &sql, SQL_PREPARED_STMT_INSERT_INTO_SPHINX_TABLE) == ERR) return rc; + fix_email_address_for_sphinx(state->b_from); fix_email_address_for_sphinx(state->b_sender); fix_email_address_for_sphinx(state->b_to); + fix_email_address_for_sphinx(state->b_from_domain); fix_email_address_for_sphinx(state->b_sender_domain); fix_email_address_for_sphinx(state->b_to_domain); + if(state->b_sender_domain){ + sender = state->b_sender; + sender_domain = state->b_sender_domain; + } p_bind_init(&sql); sql.sql[sql.pos] = (char *)&id; sql.type[sql.pos] = TYPE_LONGLONG; sql.pos++; - sql.sql[sql.pos] = state->b_sender; sql.type[sql.pos] = TYPE_STRING; sql.pos++; + sql.sql[sql.pos] = sender; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = state->b_to; sql.type[sql.pos] = TYPE_STRING; sql.pos++; - sql.sql[sql.pos] = state->b_sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; + sql.sql[sql.pos] = sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = state->b_to_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = subj; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = state->b_body; sql.type[sql.pos] = TYPE_STRING; sql.pos++; @@ -180,15 +186,25 @@ int store_meta_data(struct session_data *sdata, struct parser_state *state, struct data *data, struct config *cfg){ - int rc=ERR, result; - char *subj, *p, s[MAXBUFSIZE], s2[SMALLBUFSIZE], vcode[2*DIGEST_LENGTH+1], ref[2*DIGEST_LENGTH+1]; + int rc=ERR; + char *subj, *sender, *sender_domain, s[MAXBUFSIZE], s2[SMALLBUFSIZE], vcode[2*DIGEST_LENGTH+1], ref[2*DIGEST_LENGTH+1]; uint64 id=0; struct sql sql; subj = state->b_subject; if(*subj == ' ') subj++; - snprintf(s, sizeof(s)-1, "%llu+%s%s%s%ld%ld%ld%d%d%d%d%s%s%s", id, subj, state->b_sender, state->message_id, sdata->now, sdata->sent, sdata->retained, sdata->tot_len, sdata->hdr_len, sdata->direction, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest); + if(state->b_sender_domain){ + sender = state->b_sender; + sender_domain = state->b_sender_domain; + get_first_email_address_from_string(state->b_sender, s2, sizeof(s2)); + } else { + sender = state->b_from; + sender_domain = state->b_from_domain; + get_first_email_address_from_string(state->b_from, s2, sizeof(s2)); + } + + snprintf(s, sizeof(s)-1, "%llu+%s%s%s%ld%ld%ld%d%d%d%d%s%s%s", id, subj, sender, state->message_id, sdata->now, sdata->sent, sdata->retained, sdata->tot_len, sdata->hdr_len, sdata->direction, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest); digest_string(s, &vcode[0]); @@ -201,19 +217,6 @@ if(prepare_sql_statement(sdata, &sql, SQL_PREPARED_STMT_INSERT_INTO_META_TABLE) == ERR) return ERR; - memset(s2, 0, sizeof(s2)); - - p = state->b_sender; - do { - memset(s2, 0, sizeof(s2)); - p = split(p, ' ', s2, sizeof(s2)-1, &result); - - if(s2[0] == '\0') continue; - - if(does_it_seem_like_an_email_address(s2) == 1){ break; } - } while(p); - - if(strlen(state->b_to) < 5){ snprintf(state->b_to, SMALLBUFSIZE-1, "undisclosed-recipients@no.domain"); } @@ -222,7 +225,7 @@ p_bind_init(&sql); sql.sql[sql.pos] = &s2[0]; sql.type[sql.pos] = TYPE_STRING; sql.pos++; - sql.sql[sql.pos] = state->b_sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; + sql.sql[sql.pos] = sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = subj; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = (char *)&sdata->spam_message; sql.type[sql.pos] = TYPE_LONG; sql.pos++; sql.sql[sql.pos] = (char *)&sdata->now; sql.type[sql.pos] = TYPE_LONG; sql.pos++; diff --git a/src/parser.c b/src/parser.c index bcbba16..cd9224b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -80,22 +80,18 @@ clearhash(state->rcpt_domain); clearhash(state->journal_recipient); - // Fix From: line if it's too long + // Fix From: and Sender: lines if they are too long if(strlen(state->b_from) > 255) state->b_from[255] = '\0'; if(strlen(state->b_from_domain) > 255) state->b_from_domain[255] = '\0'; if(strlen(state->b_sender) > 255) state->b_sender[255] = '\0'; if(strlen(state->b_sender_domain) > 255) state->b_sender_domain[255] = '\0'; - // If Sender: header doesn't exist, then copy the From: header value to it - // Otherwise append the From: address to the recipients list + // TODO: If both Sender: and From: headers exist, and they are different, then + // append the From: address to recipients list to give him access to this email + // as well - if(state->b_sender[0] == '\0'){ - strcpy(state->b_sender, state->b_from); - strcpy(state->b_sender_domain, state->b_from_domain); - } else { - add_recipient(state->b_from, strlen(state->b_from), sdata, state, data, cfg); - } + // Truncate the message_id if it's >255 characters if(strlen(state->message_id) > 255) state->message_id[255] = '\0'; diff --git a/src/parser.h b/src/parser.h index ca81e3b..c18bede 100644 --- a/src/parser.h +++ b/src/parser.h @@ -38,5 +38,6 @@ void tokenize(char *buf, struct parser_state *state, struct session_data *sdata, struct data *data, struct config *cfg); void flush_attachment_buffer(struct parser_state *state, char *abuffer, unsigned int abuffersize); void fill_attachment_name_buf(struct parser_state *state, char *buf); +int get_first_email_address_from_string(char *str, char *buf, int buflen); #endif /* _PARSER_H */ diff --git a/src/parser_utils.c b/src/parser_utils.c index 6cd2bbf..abc63ad 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -624,9 +624,9 @@ for(; *p; p++){ - if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT) && *p == '@'){ continue; } + if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_SENDER || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT) && *p == '@'){ continue; } - if(state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT){ + if(state->message_state == MSG_FROM || state->message_state == MSG_SENDER || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT){ /* To fix some unusual addresses, eg. * "'user@domain'" -> user@domain @@ -1077,3 +1077,20 @@ state->anamepos++; } } + + +int get_first_email_address_from_string(char *str, char *buf, int buflen){ + int result; + + char *p = str; + do { + memset(buf, 0, buflen); + p = split(p, ' ', buf, buflen-1, &result); + + if(*buf == '\0') continue; + + if(does_it_seem_like_an_email_address(buf) == 1){ return 1; } + } while(p); + + return 0; +} diff --git a/src/tokenizer.c b/src/tokenizer.c index ebadbcf..f6b3251 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -82,9 +82,16 @@ if(q) fix_plus_sign_in_email_address(puf, &q, &len); memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len); - if(strlen(state->b_sender) < SMALLBUFSIZE-len-1){ - split_email_address(puf); - memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len); + + if(len >= MIN_EMAIL_ADDRESS_LEN && does_it_seem_like_an_email_address(puf) == 1 && state->b_sender_domain[0] == '\0'){ + if(q && strlen(q) > 5){ + memcpy(&(state->b_sender_domain), q+1, strlen(q+1)-1); + } + + if(strlen(state->b_sender) < SMALLBUFSIZE-len-1){ + split_email_address(puf); + memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len); + } } } else if((state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT || state->message_state == MSG_ENVELOPE_TO) && state->is_1st_header == 1 && state->tolen < MAXBUFSIZE-len-1){ diff --git a/unit_tests/check_parser.c b/unit_tests/check_parser.c index edcb743..9039eb5 100644 --- a/unit_tests/check_parser.c +++ b/unit_tests/check_parser.c @@ -10,6 +10,8 @@ char message_id[SMALLBUFSIZE]; char from[SMALLBUFSIZE]; char from_domain[SMALLBUFSIZE]; + char sender[SMALLBUFSIZE]; + char sender_domain[SMALLBUFSIZE]; char to[SMALLBUFSIZE]; char to_domain[SMALLBUFSIZE]; char reference[SMALLBUFSIZE]; @@ -26,22 +28,22 @@ struct data data; struct parser_test tests[] = { - {"1.eml", "", "játékok birodalma játékbolt hirlevel@jatekokbirodalma.hu hirlevel jatekokbirodalma hu ", "jatekokbirodalma.hu", "architerv m sj@acts.hu sj acts hu ", "acts.hu ", "", "BLACK FRIDAY - Hihetetlen kedvezmények csak 1 napig november 27-én", 2}, - {"2.eml", "<20151101142653.111156815AF6D@acts.hu>", "jml lighting huixinsoft67@foxmail.com huixinsoft67 foxmail com ", "foxmail.com", "sj@acts.hu sj acts hu ", "acts.hu ", "", "New design ultra slim led panel light", 0}, - {"5-ibm-images.eml", "", "ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "hu.ibm.com", "cim1@aaaa.bbb.fu cim1 aaaa bbb fu ajajaj@piler.aaa.fu ajajaj piler aaa fu ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "aaaa.bbb.fu piler.aaa.fu hu.ibm.com ", "", "***Emlékeztető*** - Egészségipar - eEgészségügy (Út a jövőbe, párbeszéd a gazdaságélénkítésről) 2010. november 4.", 5}, - {"9-attached-text.eml", "", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "usuarios-no-listados ", "", "", "Please read my attached letter", 1}, - {"13-xlsx.eml", "", "aaaaa@aaa.fu aaaaa aaa fu ", "aaa.fu", "sj@acts.hu sj acts hu ", "acts.hu ", "", "ez egy teszt", 1}, - {"15-image-only-spam.eml", "", "kriegel paff sketches@pnmarketing.com sketches pnmarketing com ", "pnmarketing.com", "holmon knobel aaaaa@acts.hu aaaaa acts hu ", "acts.hu ", "", "Lack of concentration, backed up by a vocabulary of tremendous scope, a", 1}, - {"16-rfc822-attachment-1.eml", "", "martonagnes martonagnes@lajt.hu martonagnes lajt hu erős istván eistvan@marosheviz.info ", "lajt.hu", "martonagnes@lajt.hu martonagnes lajt hu ", "lajt.hu ", "", "Féláras akció! 31000Ft/2fő/3nap húsvétkor is a Park Inn****-ben!", 2 }, - {"17-attached-text-bogus-mime.eml", "", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "usuarios-no-listados ", "", "", "Please read my attached letter", 1}, - {"18-spam-html-encoding.eml", "", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster@aaa.fu postmaster aaa fu ", "aaa.fu ", "", "TÁJÉKOZTATÁSVargay Péter", 0}, - {"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, - {"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, - {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, - {"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0}, - {"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0}, - {"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", " www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0}, - {"33-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "[JIRA] Commented: (AAAA-151) A aaa-nek kerek egy XXX-et, ZH74617282, ACC27363484944", 0}, + {"1.eml", "", "játékok birodalma játékbolt hirlevel@jatekokbirodalma.hu hirlevel jatekokbirodalma hu ", "jatekokbirodalma.hu", "", "", "architerv m sj@acts.hu sj acts hu ", "acts.hu ", "", "BLACK FRIDAY - Hihetetlen kedvezmények csak 1 napig november 27-én", 2}, + {"2.eml", "<20151101142653.111156815AF6D@acts.hu>", "jml lighting huixinsoft67@foxmail.com huixinsoft67 foxmail com ", "foxmail.com", "jml lighting hwtwi@mcqw.com hwtwi mcqw com ", "mcqw.com", "sj@acts.hu sj acts hu ", "acts.hu ", "", "New design ultra slim led panel light", 0}, + {"5-ibm-images.eml", "", "ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "hu.ibm.com", "marta riman rimanmarta@hu.ibm.com rimanmarta hu ibm com ", "hu.ibm.com", "cim1@aaaa.bbb.fu cim1 aaaa bbb fu ajajaj@piler.aaa.fu ajajaj piler aaa fu ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "aaaa.bbb.fu piler.aaa.fu hu.ibm.com ", "", "***Emlékeztető*** - Egészségipar - eEgészségügy (Út a jövőbe, párbeszéd a gazdaságélénkítésről) 2010. november 4.", 5}, + {"9-attached-text.eml", "", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "aaa aaa aaa@aaa.fu aaa aaa fu ", "aaa.fu", "usuarios-no-listados ", "", "", "Please read my attached letter", 1}, + {"13-xlsx.eml", "", "aaaaa@aaa.fu aaaaa aaa fu ", "aaa.fu", "", "", "sj@acts.hu sj acts hu ", "acts.hu ", "", "ez egy teszt", 1}, + {"15-image-only-spam.eml", "", "kriegel paff sketches@pnmarketing.com sketches pnmarketing com ", "pnmarketing.com", "", "", "holmon knobel aaaaa@acts.hu aaaaa acts hu ", "acts.hu ", "", "Lack of concentration, backed up by a vocabulary of tremendous scope, a", 1}, + {"16-rfc822-attachment-1.eml", "", "martonagnes martonagnes@lajt.hu martonagnes lajt hu erős istván eistvan@marosheviz.info ", "lajt.hu", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "martonagnes@lajt.hu martonagnes lajt hu ", "lajt.hu ", "", "Féláras akció! 31000Ft/2fő/3nap húsvétkor is a Park Inn****-ben!", 2 }, + {"17-attached-text-bogus-mime.eml", "", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "usuarios-no-listados ", "", "", "Please read my attached letter", 1}, + {"18-spam-html-encoding.eml", "", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "postmaster@aaa.fu postmaster aaa fu ", "aaa.fu ", "", "TÁJÉKOZTATÁSVargay Péter", 0}, + {"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, + {"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, + {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "", "", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, + {"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0}, + {"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0}, + {"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", " www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0}, + {"33-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "[JIRA] Commented: (AAAA-151) A aaa-nek kerek egy XXX-et, ZH74617282, ACC27363484944", 0}, }; @@ -66,7 +68,9 @@ assert(strcmp(state.message_id, tests[i].message_id) == 0 && "test_parser()1"); assert(strcmp(state.b_from, tests[i].from) == 0 && "test_parser()2a"); - assert(strcmp(state.b_from_domain, tests[i].from_domain) == 0 && "test_parser()2b"); + assert(strcmp(state.b_sender, tests[i].sender) == 0 && "test_parser()2b"); + assert(strcmp(state.b_from_domain, tests[i].from_domain) == 0 && "test_parser()2c"); + assert(strcmp(state.b_sender_domain, tests[i].sender_domain) == 0 && "test_parser()2d"); assert(strcmp(state.b_to, tests[i].to) == 0 && "test_parser()3a"); assert(strcmp(state.b_to_domain, tests[i].to_domain) == 0 && "test_parser()3b"); assert(strcmp(state.b_subject, tests[i].subject) == 0 && "test_parser()4");