diff --git a/src/parser_utils.c b/src/parser_utils.c index 49ab3da..0b06ee6 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -824,21 +824,22 @@ if(p){ p++; - // If the line has the 'name' more than once, then truncate the subsequent parts, ie. - // utf-8''P;LAN%20Holden%204.docx;filename="P;LAN Holden 4.docx" ==> utf-8''P;LAN%20Holden%204.docx - q = strstr(p, name); - if(q) *q = '\0'; + // skip any whitespace after name=, ie. name = " + while(*p==' ' || *p=='\t') p++; - q = strrchr(p, ';'); - if(q) *q = '\0'; - q = strrchr(p, '"'); - if(q){ - *q = '\0'; - p = strchr(p, '"'); - if(p){ - p++; - } + // if there's a double quote after the equal symbol (=), ie. name*="utf-8.... + if(*p == '"'){ + p++; + q = strchr(p, '"'); + + if(q) *q = '\0'; } + else { + // no " after =, so split on ; + q = strchr(p, ';'); + if(q) *q = '\0'; + } + if(extended == 1){ encoding = p; diff --git a/unit_tests/check_parser_utils.c b/unit_tests/check_parser_utils.c index 4cc9e31..e457e78 100644 --- a/unit_tests/check_parser_utils.c +++ b/unit_tests/check_parser_utils.c @@ -97,10 +97,25 @@ {"Content-Type: image/png; name=\"Screenshot from 2015-11-10 10:07:13.png\"", "name", "Screenshot from 2015-11-10 10:07:13.png"}, {"Content-Disposition: attachment; filename=\"zzzzz Email Examples.zip\";", "name", "zzzzz Email Examples.zip"}, + {"Content-Type: application/msword; name*=\"iso-8859-1''Einverst%E4ndniserkl%E4rung_Kids-PKW_Familienname.doc\"", "name", "Einverständniserklärung_Kids-PKW_Familienname.doc"}, + {"Content-Type: application/msword; name*= \"iso-8859-1''Einverst%E4ndniserkl%E4rung_Kids-PKW_Familienname.doc\"", "name", "Einverständniserklärung_Kids-PKW_Familienname.doc"}, + + // This one sucks, and I don't think it's a proper definition + {"Content-Type: application/msword; filename*=utf-8''P;LAN%20Holden%204.docx;filename=\"P;LAN Holden 4.docx\"", "name", "P"}, + // Adding quotes makes it acceptable to the parser + {"Content-Type: application/msword; filename*=\"utf-8''P;LAN%20Holden%204.docx\";filename=\"P;LAN Holden 4.docx\"", "name", "P;LAN Holden 4.docx"}, + + {"Content-Type: null; name=\"toDev-Netengineering.png\"", "name", "toDev-Netengineering.png"}, + {"Content-Type: null; name=\"toDev-name-Netengineering.png\"", "name", "toDev-name-Netengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDev-Netengineering.png\"", "name", "toDev-Netengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDev-name-Netengineering.png\"", "name", "toDev-name-Netengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDevnameNetengineering.png\"", "name", "toDevnameNetengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDev-namE-Netengineering.png\"", "name", "toDev-namE-Netengineering.png"}, + {"foo: bar; title=Economy", "title", "Economy"}, {"foo: bar; title=\"US-$ rates\"", "title", "US-$ rates"}, {"foo: bar; title*=iso-8859-1'en'%A3%20rates", "title", "£ rates"}, - {"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"} + {"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"}, }; TEST_HEADER();