X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=parsers.c;h=f84cf890d678899209e2875336fd5955e00018d5;hb=ddac7695906a4e739e5bf0a0628142e68601c8ff;hp=fd62f9d74c53a99faac671becfc42014719ce65e;hpb=dfd5a3ced69851300f203f904334d3e44b004176;p=privoxy.git diff --git a/parsers.c b/parsers.c index fd62f9d7..f84cf890 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.81 2006/12/31 22:21:33 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.87 2007/01/31 16:21:38 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -18,7 +18,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.81 2006/12/31 22:21:33 fabiankeil * `client_if_none_match', `get_destination_from_headers', * `parse_header_time' and `server_set_cookie'. * - * Copyright : Written by and Copyright (C) 2001-2006 the SourceForge + * Copyright : Written by and Copyright (C) 2001-2007 the SourceForge * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -45,6 +45,42 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.81 2006/12/31 22:21:33 fabiankeil * * Revisions : * $Log: parsers.c,v $ + * Revision 1.87 2007/01/31 16:21:38 fabiankeil + * Search for Max-Forwards headers case-insensitive, + * don't generate the "501 unsupported" message for invalid + * Max-Forwards values and don't increase negative ones. + * + * Revision 1.86 2007/01/30 13:05:26 fabiankeil + * - Let server_set_cookie() check the expiration date + * of cookies and don't touch the ones that are already + * expired. Fixes problems with low quality web applications + * as described in BR 932612. + * + * - Adjust comment in client_max_forwards to reality; + * remove invalid Max-Forwards headers. + * + * Revision 1.85 2007/01/26 15:33:46 fabiankeil + * Stop filter_header() from unintentionally removing + * empty header lines that were enlisted by the continue + * hack. + * + * Revision 1.84 2007/01/24 12:56:52 fabiankeil + * - Repeat the request URL before logging any headers. + * Makes reading the log easier in case of simultaneous requests. + * - If there are more than one Content-Type headers in one request, + * use the first one and remove the others. + * - Remove "newval" variable in server_content_type(). + * It's only used once. + * + * Revision 1.83 2007/01/12 15:03:02 fabiankeil + * Correct a cast, check inflateEnd() exit code + * to see if we have to, replace sprintf calls + * with snprintf. + * + * Revision 1.82 2007/01/01 19:36:37 fabiankeil + * Integrate a modified version of Wil Mahan's + * zlib patch (PR #895531). + * * Revision 1.81 2006/12/31 22:21:33 fabiankeil * Skip empty filter files in filter_header() * but don't ignore the ones that come afterwards. @@ -816,6 +852,8 @@ jb_err add_to_iob(struct client_state *csp, char *buf, int n) jb_err decompress_iob(struct client_state *csp) { char *buf; /* new, uncompressed buffer */ + char *cur; /* Current iob position (to keep the original + * iob->cur unmodified if we return early) */ size_t bufsize; /* allocated size of the new buffer */ size_t skip_size; /* Number of bytes at the beginning of the iob that we should NOT decompress. */ @@ -825,6 +863,8 @@ jb_err decompress_iob(struct client_state *csp) bufsize = csp->iob->size; skip_size = (size_t)(csp->iob->cur - csp->iob->buf); + cur = csp->iob->cur; + if (bufsize < 10) { /* @@ -849,16 +889,16 @@ jb_err decompress_iob(struct client_state *csp) * Strip off the gzip header. Please see RFC 1952 for more * explanation of the appropriate fields. */ - if ((*csp->iob->cur++ != (char)0x1f) - || (*csp->iob->cur++ != (char)0x8b) - || (*csp->iob->cur++ != Z_DEFLATED)) + if ((*cur++ != (char)0x1f) + || (*cur++ != (char)0x8b) + || (*cur++ != Z_DEFLATED)) { log_error (LOG_LEVEL_ERROR, "Invalid gzip header when decompressing"); return JB_ERR_COMPRESS; } else { - int flags = *csp->iob->cur++; + int flags = *cur++; /* * XXX: These magic numbers should be replaced * with macros to give a better idea what they do. @@ -866,10 +906,10 @@ jb_err decompress_iob(struct client_state *csp) if (flags & 0xe0) { /* The gzip header has reserved bits set; bail out. */ - log_error (LOG_LEVEL_ERROR, "Invalid gzip header when decompressing"); + log_error (LOG_LEVEL_ERROR, "Invalid gzip header flags when decompressing"); return JB_ERR_COMPRESS; } - csp->iob->cur += 6; + cur += 6; /* Skip extra fields if necessary. */ if (flags & 0x04) @@ -878,32 +918,73 @@ jb_err decompress_iob(struct client_state *csp) * Skip a given number of bytes, specified * as a 16-bit little-endian value. */ - csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8); + /* + * XXX: This code used to be: + * + * csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8); + * + * which I had to change into: + * + * cur += *cur++ + (*cur++ << 8); + * + * at which point gcc43 finally noticed that the value + * of cur is undefined (it depends on which of the + * summands is evaluated first). + * + * I haven't come across a site where this + * code is actually executed yet, but I hope + * it works anyway. + */ + int skip_bytes; + skip_bytes = *cur++; + skip_bytes = *cur++ << 8; + + assert(skip_bytes == *csp->iob->cur-2 + ((*csp->iob->cur-1) << 8)); + + /* + * The number of bytes to skip should be positive + * and we'd like to stay in the buffer. + */ + if((skip_bytes < 0) || (skip_bytes >= (csp->iob->eod - cur))) + { + log_error (LOG_LEVEL_ERROR, + "Unreasonable amount of bytes to skip (%d). Stopping decompression", + skip_bytes); + return JB_ERR_COMPRESS; + } + log_error (LOG_LEVEL_INFO, + "Skipping %d bytes for gzip compression. Does this sound right?", + skip_bytes); + cur += skip_bytes; } /* Skip the filename if necessary. */ if (flags & 0x08) { /* A null-terminated string follows. */ - while (*csp->iob->cur++); + while (*cur++); } /* Skip the comment if necessary. */ if (flags & 0x10) { - while (*csp->iob->cur++); + while (*cur++); } /* Skip the CRC if necessary. */ if (flags & 0x02) { - csp->iob->cur += 2; + cur += 2; } } } else if (csp->content_type & CT_DEFLATE) { - log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *csp->iob->cur); + /* + * XXX: The debug level should be lowered + * before the next stable release. + */ + log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *cur); /* * In theory (that is, according to RFC 1950), deflate-compressed * data should begin with a two-byte zlib header and have an @@ -929,8 +1010,8 @@ jb_err decompress_iob(struct client_state *csp) } /* Set up the fields required by zlib. */ - zstr.next_in = (Bytef *)csp->iob->cur; - zstr.avail_in = (unsigned long)(csp->iob->eod - csp->iob->cur); + zstr.next_in = (Bytef *)cur; + zstr.avail_in = (unsigned int)(csp->iob->eod - cur); zstr.zalloc = Z_NULL; zstr.zfree = Z_NULL; zstr.opaque = Z_NULL; @@ -1031,7 +1112,22 @@ jb_err decompress_iob(struct client_state *csp) } } - inflateEnd(&zstr); + if (Z_STREAM_ERROR == inflateEnd(&zstr)) + { + log_error(LOG_LEVEL_ERROR, + "Inconsistent stream state after decompression: %s", zstr.msg); + /* + * XXX: Intentionally no return. + * + * According to zlib.h, Z_STREAM_ERROR is returned + * "if the stream state was inconsistent". + * + * I assume in this case inflate()'s status + * would also be something different than Z_STREAM_END + * so this check should be redundant, but lets see. + */ + } + if (status != Z_STREAM_END) { /* We failed to decompress the stream. */ @@ -1229,6 +1325,7 @@ char *sed(const struct parsers pats[], if (first_run) /* Parse and print */ { + log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++) { for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next) @@ -1470,7 +1567,12 @@ jb_err filter_header(struct client_state *csp, char **header) } } - if ( 0 == size ) + /* + * Additionally checking for hits is important because if + * the continue hack is triggered, server headers can + * arrive empty to separate multiple heads from each other. + */ + if ((0 == size) && hits) { log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header); freez(*header); @@ -1540,12 +1642,14 @@ jb_err crunch_server_header(struct client_state *csp, char **header) return JB_ERR_OK; } + + /********************************************************************* * * Function : server_content_type * * Description : Set the content-type for filterable types (text/.*, - * javascript and image/gif) unless filtering has been + * .*xml.*, javascript and image/gif) unless filtering has been * forbidden (CT_TABOO) while parsing earlier headers. * NOTE: Since text/plain is commonly used by web servers * for files whose correct type is unknown, we don't @@ -1564,11 +1668,22 @@ jb_err crunch_server_header(struct client_state *csp, char **header) *********************************************************************/ jb_err server_content_type(struct client_state *csp, char **header) { - const char *newval; - - newval = csp->action->string[ACTION_STRING_CONTENT_TYPE]; + /* Remove header if it isn't the first Content-Type header */ + if(csp->content_type && (csp->content_type != CT_TABOO)) + { + /* + * Another, slightly slower, way to see if + * we already parsed another Content-Type header. + */ + assert(NULL != get_header_value(csp->headers, "Content-Type:")); + + log_error(LOG_LEVEL_ERROR, + "Multiple Content-Type headers. Removing and ignoring: \'%s\'", + *header); + freez(*header); - assert(!csp->content_type || (csp->content_type == CT_TABOO)); + return JB_ERR_OK; + } if (!(csp->content_type & CT_TABOO)) { @@ -1622,12 +1737,11 @@ jb_err server_content_type(struct client_state *csp, char **header) { freez(*header); *header = strdup("Content-Type: "); - string_append(header, newval); + string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]); if (header == NULL) { - log_error(LOG_LEVEL_HEADER, - "Insufficient memory. Content-Type crunched without replacement!"); + log_error(LOG_LEVEL_HEADER, "Insufficient memory to replace Content-Type!"); return JB_ERR_MEMORY; } log_error(LOG_LEVEL_HEADER, "Modified: %s!", *header); @@ -1733,8 +1847,8 @@ jb_err server_transfer_coding(struct client_state *csp, char **header) jb_err server_content_encoding(struct client_state *csp, char **header) { #ifdef FEATURE_ZLIB - /* XXX: Why would we modify the content if it was taboo? */ - if ((csp->flags & CSP_FLAG_MODIFIED) && !(csp->content_type & CT_TABOO)) + if ((csp->flags & CSP_FLAG_MODIFIED) + && (csp->content_type & (CT_GZIP | CT_DEFLATE))) { /* * We successfully decompressed the content, @@ -1800,6 +1914,7 @@ jb_err server_content_encoding(struct client_state *csp, char **header) *********************************************************************/ jb_err server_content_length(struct client_state *csp, char **header) { + const size_t max_header_length = 80; if (csp->content_length != 0) /* Content length could have been modified */ { /* @@ -1807,15 +1922,16 @@ jb_err server_content_length(struct client_state *csp, char **header) * is different than the original value? */ freez(*header); - *header = (char *) zalloc(100); + *header = (char *) zalloc(max_header_length); if (*header == NULL) { return JB_ERR_MEMORY; } - sprintf(*header, "Content-Length: %d", (int) csp->content_length); - - log_error(LOG_LEVEL_HEADER, "Adjust Content-Length to %d", (int) csp->content_length); + snprintf(*header, max_header_length, "Content-Length: %d", + (int)csp->content_length); + log_error(LOG_LEVEL_HEADER, "Adjusted Content-Length to %d", + (int)csp->content_length); } return JB_ERR_OK; @@ -2308,11 +2424,13 @@ jb_err client_accept_language(struct client_state *csp, char **header) if (*header == NULL) { - log_error(LOG_LEVEL_ERROR, " Insufficent memory. Accept-Language header crunched without replacement."); + log_error(LOG_LEVEL_ERROR, + "Insufficent memory. Accept-Language header crunched without replacement."); } else { - log_error(LOG_LEVEL_HEADER, "Accept-Language header crunched and replaced with: %s", *header); + log_error(LOG_LEVEL_HEADER, + "Accept-Language header crunched and replaced with: %s", *header); } } return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK; @@ -2581,23 +2699,42 @@ jb_err client_x_forwarded(struct client_state *csp, char **header) *********************************************************************/ jb_err client_max_forwards(struct client_state *csp, char **header) { - unsigned int max_forwards; + int max_forwards; - if ((0 == strcmpic(csp->http->gpc, "trace")) - || (0 == strcmpic(csp->http->gpc, "options"))) + if ((0 == strcmpic(csp->http->gpc, "trace")) || + (0 == strcmpic(csp->http->gpc, "options"))) { - if (1 == sscanf(*header, "Max-Forwards: %u", &max_forwards)) + assert(*(*header+12) == ':'); + if (1 == sscanf(*header+12, ": %u", &max_forwards)) { - if (max_forwards-- >= 1) + if (max_forwards > 0) { - sprintf(*header, "Max-Forwards: %u", max_forwards); - log_error(LOG_LEVEL_HEADER, "Max forwards of %s request now %d", csp->http->gpc, max_forwards); + snprintf(*header, strlen(*header)+1, "Max-Forwards: %u", --max_forwards); + log_error(LOG_LEVEL_HEADER, "Max-Forwards value for %s request reduced to %u.", + csp->http->gpc, max_forwards); + } + else if (max_forwards < 0) + { + log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header); + freez(*header); } else { - log_error(LOG_LEVEL_ERROR, "Non-intercepted %s request with Max-Forwards zero!", csp->http->gpc); + /* + * Not supposed to be reached. direct_response() which + * was already called earlier in chat() should have + * intercepted the request. + */ + log_error(LOG_LEVEL_ERROR, + "Non-intercepted %s request with Max-Forwards zero!", csp->http->gpc); + assert(max_forwards != 0); } } + else + { + log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header); + freez(*header); + } } return JB_ERR_OK; @@ -2855,7 +2992,8 @@ jb_err client_x_filter(struct client_state *csp, char **header) { if (csp->action->flags & ACTION_FORCE_TEXT_MODE) { - log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to fetch without filtering!"); + log_error(LOG_LEVEL_HEADER, + "force-text-mode overruled the client's request to fetch without filtering!"); } else { @@ -3160,8 +3298,13 @@ jb_err server_http(struct client_state *csp, char **header) * Function : server_set_cookie * * Description : Handle the server "cookie" header properly. - * Log cookie to the jar file. Then "crunch" it, - * or accept it. Called from `sed'. + * Log cookie to the jar file. Then "crunch", + * accept or rewrite it to a session cookie. + * Called from `sed'. + * + * TODO: Allow the user to specify a new expiration + * time to cause the cookie to expire even before the + * browser is closed. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3176,6 +3319,12 @@ jb_err server_http(struct client_state *csp, char **header) *********************************************************************/ jb_err server_set_cookie(struct client_state *csp, char **header) { + time_t now; + time_t cookie_time; + struct tm tm_now; + struct tm tm_cookie; + time(&now); + #ifdef FEATURE_COOKIE_JAR if (csp->config->jar) { @@ -3186,9 +3335,7 @@ jb_err server_set_cookie(struct client_state *csp, char **header) * the %z field in strftime() */ char tempbuf[ BUFFER_SIZE ]; - time_t now; - struct tm tm_now; - time (&now); + #ifdef HAVE_LOCALTIME_R tm_now = *localtime_r(&now, &tm_now); #elif FEATURE_PTHREAD @@ -3249,22 +3396,122 @@ jb_err server_set_cookie(struct client_state *csp, char **header) next_tag = cur_tag + strlen(cur_tag); } - /* Is this the "Expires" tag? */ + /* + * Check the expiration date to see + * if the cookie is still valid, if yes, + * rewrite it to a session cookie. + */ if (strncmpic(cur_tag, "expires=", 8) == 0) { - /* Delete the tag by copying the rest of the string over it. - * (Note that we cannot just use "strcpy(cur_tag, next_tag)", - * since the behaviour of strcpy is undefined for overlapping - * strings.) + char *match; + /* + * Try the valid time formats we know about. + * + * XXX: Maybe the log messages should be removed + * for the next stable release. They just exist to + * see which time format gets the most hits and + * should be checked for first. */ - memmove(cur_tag, next_tag, strlen(next_tag) + 1); + if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%y %H:%M:%S ", &tm_cookie))) + { + log_error(LOG_LEVEL_HEADER, + "cookie \'%s\' send by %s appears to be using time format 1.", + csp->http->url, *header); + } + else if (NULL != (match = strptime(cur_tag, "expires=%A, %e-%b-%Y %H:%M:%S ", &tm_cookie))) + { + log_error(LOG_LEVEL_HEADER, + "cookie \'%s\' send by %s appears to be using time format 2.", + csp->http->url, *header); - /* That changed the header, need to issue a log message */ - changed = 1; + } + else if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%Y %H:%M:%S ", &tm_cookie))) + { + log_error(LOG_LEVEL_HEADER, + "cookie \'%s\' send by %s appears to be using time format 3.", + csp->http->url, *header); + } + + /* Did any of them match? */ + if (NULL == match) + { + /* + * Nope, treat it as if it was still valid. + * + * XXX: Should we remove the whole cookie instead? + */ + log_error(LOG_LEVEL_ERROR, + "Can't parse %s. Unsupported time format?", cur_tag); + memmove(cur_tag, next_tag, strlen(next_tag) + 1); + changed = 1; + } + else + { + /* + * Yes. Check if the cookie is still valid. + * + * If the cookie is already expired it's probably + * a delete cookie and even if it isn't, the browser + * will discard it anyway. + */ + + /* + * XXX: timegm() isn't available on some AmigaOS + * versions and our replacement doesn't work. + * + * Our options are to either: + * + * - disable session-cookies-only completely if timegm + * is missing, + * + * - to simply remove all expired tags, like it has + * been done until Privoxy 3.0.6 and to live with + * the consequence that it can cause login/logout + * problems on servers that don't validate their + * input properly, or + * + * - to replace it with mktime in which + * case there is a slight chance of valid cookies + * passing as already expired. + * + * This is the way it's currently done and it's not + * as bad as it sounds. If the missing GMT offset is + * enough to change the result of the expiration check + * the cookie will be only valid for a few hours + * anyway, which in many cases will be shorter + * than a browser session. + */ + cookie_time = timegm(&tm_cookie); + if (cookie_time - now < 0) + { + log_error(LOG_LEVEL_HEADER, + "Cookie \'%s\' is already expired and can pass unmodified.", *header); + /* Just in case some clown sets more then one expiration date */ + cur_tag = next_tag; + } + else + { + log_error(LOG_LEVEL_HEADER, + "Cookie \'%s\' is still valid and has to be rewritten.", *header); + + /* + * Delete the tag by copying the rest of the string over it. + * (Note that we cannot just use "strcpy(cur_tag, next_tag)", + * since the behaviour of strcpy is undefined for overlapping + * strings.) + */ + memmove(cur_tag, next_tag, strlen(next_tag) + 1); + + /* That changed the header, need to issue a log message */ + changed = 1; + + /* + * Note that the next tag has now been moved to *cur_tag, + * so we do not need to update the cur_tag pointer. + */ + } + } - /* Note that the next tag has now been moved to *cur_tag, - * so we do not need to update the cur_tag pointer. - */ } else { @@ -3275,7 +3522,9 @@ jb_err server_set_cookie(struct client_state *csp, char **header) if (changed) { - log_error(LOG_LEVEL_HEADER, "Changed cookie to a temporary one."); + assert(NULL != *header); + log_error(LOG_LEVEL_HEADER, "Cookie rewritten to a temporary one: %s", + *header); } }