diff --git a/src/decoder.c b/src/decoder.c index 5317808..d46e585 100644 --- a/src/decoder.c +++ b/src/decoder.c @@ -206,9 +206,13 @@ i += 2; } + else if(p[i] == '_'){ + c = ' '; + } p[k] = c; k++; + } p[k] = '\0'; diff --git a/src/parser_utils.c b/src/parser_utils.c index 377b751..1a41b72 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -320,13 +320,12 @@ * but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE */ char v[MAXBUFSIZE], u[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE]; - int need_encoding, ret; + int need_encoding, ret, prev_encoded=0, n_tokens=0; + int b64=0, qp=0; if(buflen < 5) return; memset(puf, 0, sizeof(puf)); - memset(encoding, 0, sizeof(encoding)); - q = buf; @@ -342,18 +341,46 @@ * We can't use split_str(p, "=?", ...) it will fail with the following pattern * =?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?= * - * Also the below patter requires special care: + * Also the below pattern requires special care: * =?gb2312?B??==?gb2312?Q??= + * + * And we have to check the following cases as well: + * Happy New Year! =?utf-8?q?=F0=9F=8E=86?= */ + b64 = qp = 0; + memset(encoding, 0, sizeof(encoding)); + r = strstr(p, "=?"); if(r){ p = r + 2; + + e = strchr(p, '?'); + if(e){ + *e = '\0'; + snprintf(encoding, sizeof(encoding)-1, "%s", p); + *e = '?'; + + s = strcasestr(e, "?B?"); + if(s){ + b64 = 1; + p = s + 3; + } + else { + s = strcasestr(e, "?Q?"); + if(s){ + qp = 1; + p = s + 3; + } + } + } + end = strstr(p, "?="); if(end){ *end = '\0'; } + snprintf(u, sizeof(u)-1, "%s", p); if(end) { @@ -367,47 +394,38 @@ if(u[0] == 0) continue; - memset(encoding, 0, sizeof(encoding)); + n_tokens++; - // Check if it's either ?B? or ?Q? encoding ... - s = strcasestr(u, "?B?"); - if(s){ - decodeBase64(s+3); + if(b64 == 1) decodeBase64(u); + else if(qp == 1) decodeQP(u); + + + /* + * https://www.ietf.org/rfc/rfc2047.txt says that + * + * "When displaying a particular header field that contains multiple + * 'encoded-word's, any 'linear-white-space' that separates a pair of + * adjacent 'encoded-word's is ignored." (6.2) + */ + if(prev_encoded == 1 && (b64 == 1 || qp == 1)) {} + else if(n_tokens > 1){ + strncat(puf, " ", sizeof(puf)-strlen(puf)-1); } - else { - s = strcasestr(u, "?Q?"); - if(s){ - decodeQP(s+3); - r = s + 3; - for(; *r; r++){ - if(*r == '_') *r = ' '; - } + + if(b64 == 1 || qp == 1){ + prev_encoded = 1; + need_encoding = 0; + ret = ERR; + + if(encoding[0] && strcasecmp(encoding, "utf-8")){ + need_encoding = 1; + ret = utf8_encode(u, strlen(u), &tmpbuf[0], sizeof(tmpbuf), encoding); } - } - // ... if it is, then get the encoding - if(s){ - e = strchr(u, '?'); - if(e){ - *e = '\0'; - snprintf(encoding, sizeof(encoding)-1, "%s", u); - *e = '?'; - - need_encoding = 0; - ret = ERR; - - if(encoding[0] && strcasecmp(encoding, "utf-8")){ - need_encoding = 1; - ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding); - } - - if(need_encoding == 1 && ret == OK) - strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1); - else - strncat(puf, s+3, sizeof(puf)-strlen(puf)-1); + if(need_encoding == 1 && ret == OK){ + strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1); } else { - memset(encoding, 0, sizeof(encoding)); strncat(puf, u, sizeof(puf)-strlen(puf)-1); } } @@ -417,8 +435,6 @@ } while(p); - if(q && encoding[0] == 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1); - } while(q); snprintf(buf, buflen-1, "%s", puf); diff --git a/unit_tests/check_parser_utils.c b/unit_tests/check_parser_utils.c index d87a837..e20ec31 100644 --- a/unit_tests/check_parser_utils.c +++ b/unit_tests/check_parser_utils.c @@ -125,7 +125,7 @@ {"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"}, {"=?iso-8859-2?Q?RE:_test.aaa.fu_z=F3na?=", "RE: test.aaa.fu zóna"}, {"=?iso-8859-2?Q?V=E1ltoz=E1s_az_IT_szervezetben_/_Personal_changes_in_the_?=", "Változás az IT szervezetben / Personal changes in the "}, - {"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"}, + {"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"}, {"=?UTF-8?Q?[JIRA]_Created:_(HUDSS-196)_T=C5=B1zfal_?=", "[JIRA] Created: (HUDSS-196) Tűzfal "}, {"=?iso-8859-2?Q?RE:_Baptista_Szeretetszolg=E1lat?=", "RE: Baptista Szeretetszolgálat"}, {"=?iso-8859-2?B?SXR0IGF6IE1OQiBuYWd5IGRvYuFzYTogaXNt6XQgYmVsZW55+mxuYWsgYSBoaXRlbGV66XNiZSAoMjAxNS4xMS4wMy4gLSBzakBhY3RzLmh1KQ==?=", "Itt az MNB nagy dobása: ismét belenyúlnak a hitelezésbe (2015.11.03. - sj@acts.hu)"}, @@ -139,7 +139,7 @@ {"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"}, {"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"}, {"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"}, - {"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."}, + {"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."}, {"1gy2tt. V3l4d. M5sk6nt", "1gy2tt. V3l4d. M5sk6nt"}, {"=?iso-8859-2?B?03Jp4XNpIG1lZ2xlcGV06XMsIG5pbmNzIHT2YmIgbWVudHPpZyBBbWVyaWthIHN64W3hcmEgKDIwMTUuMTEuMDYuIC0gc2pAYWN0cy5odSk=?=", "Óriási meglepetés, nincs több mentség Amerika számára (2015.11.06. - sj@acts.hu)"}, {"=?utf-8?B?Rlc6IEVtYWlsIGZvZ2Fkw6FzaSBoaWJh?=", "FW: Email fogadási hiba"}, @@ -154,6 +154,8 @@ {"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"}, {"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"}, {"=?gb2312?B?yc/Gz76pIC0gw7/fTMir0bKy6YjzuOYgKDIwMTcxMDMwLTMxKSBHQlcgUG9k?==?gb2312?Q?ium_&_Basement.docx?=", "上葡京 - 每週全巡查報告 (20171030-31) GBW Podium & Basement.docx"}, + {"Subject: =?UTF-8?Q?=E2=98=85_JubiDu!Versandkost?= =?UTF-8?Q?enfrei-Verl=C3=A4ngerung!=E2=98=85?=", "Subject: ★ JubiDu!Versandkostenfrei-Verlängerung!★"}, + {"Happy New Year! =?utf-8?q?=F0=9F=8E=86?=", "Happy New Year! 🎆"}, }; TEST_HEADER(); diff --git a/unit_tests/run.sh b/unit_tests/run.sh index df75805..7bbe420 100755 --- a/unit_tests/run.sh +++ b/unit_tests/run.sh @@ -5,8 +5,10 @@ set -o nounset set -x -LD_LIBRARY_PATH=../src ./check_parser_utils -LD_LIBRARY_PATH=../src ./check_parser -LD_LIBRARY_PATH=../src ./check_rules -LD_LIBRARY_PATH=../src ./check_digest -LD_LIBRARY_PATH=../src ./check_mydomains +export LD_LIBRARY_PATH=../src + +./check_parser_utils +./check_parser +./check_rules +./check_digest +./check_mydomains