diff --git a/src/parser.c b/src/parser.c index 8ef39e3..0f56e3c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -419,6 +419,7 @@ } + /* remove all HTML tags */ if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state); if(state->message_state == MSG_BODY && state->qp == 1){ @@ -426,7 +427,8 @@ decodeQP(buf); } - decodeURL(buf); + /* I believe that we can live without this function call */ + //decodeURL(buf); if(state->texthtml == 1) decodeHTML(buf); diff --git a/src/parser_utils.c b/src/parser_utils.c index bfe9b91..ce8548d 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -306,9 +306,7 @@ if(p){ memset(state->qpbuf, 0, MAX_TOKEN_LEN); if(strlen(p) < MAX_TOKEN_LEN-1){ - //snprintf(state->qpbuf, MAX_TOKEN_LEN-1, "%s", p); memcpy(&(state->qpbuf[0]), p, MAX_TOKEN_LEN-1); - *p = '\0'; } @@ -334,9 +332,7 @@ if(buf[strlen(buf)-1] != '\n'){ p = strrchr(buf, ' '); if(p){ - //strncpy(state->miscbuf, p+1, MAX_TOKEN_LEN-1); memcpy(&(state->miscbuf[0]), p+1, MAX_TOKEN_LEN-1); - *p = '\0'; } } @@ -478,17 +474,10 @@ if(strncasecmp((char *)p, "http://", 7) == 0){ p += 7; url = 1; continue; } if(strncasecmp((char *)p, "https://", 8) == 0){ p += 8; url = 1; continue; } - if(url == 1 && (*p == '.' || *p == '-' || *p == '_' || *p == '/' || isalnum(*p)) ) continue; + if(url == 1 && (*p == '.' || *p == '-' || *p == '_' || *p == '/' || *p == '%' || *p == '?' || isalnum(*p)) ) continue; if(url == 1) url = 0; } - if(state->texthtml == 1 && state->message_state == MSG_BODY && strncmp((char *)p, "HTML*", 5) == 0){ - p += 5; - while(isspace(*p) == 0){ - p++; - } - } - if(delimiter_characters[(unsigned int)*p] != ' ') *p = ' '; /* we MUSTN'T convert it to lowercase in the 'else' case, because it breaks utf-8 encoding! */