X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=parsers.c;h=4aa786e3e38d76c672002bb104a8b379a44a71f9;hb=4fa929237d8a85915f2c58071697301af5192f96;hp=317eee430b6098030be6755e789dceefec1ced5e;hpb=ac79da36ceabd35970b6a8ce92b794be37cf1427;p=privoxy.git diff --git a/parsers.c b/parsers.c index 317eee43..4aa786e3 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.88 2007/02/07 11:27:12 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.92 2007/03/05 13:25:32 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -10,13 +10,12 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.88 2007/02/07 11:27:12 fabiankeil * `client_uagent', `client_x_forwarded', * `client_x_forwarded_adder', `client_xtra_adder', * `content_type', `crumble', `destroy_list', `enlist', - * `flush_socket', ``get_header', `sed', `filter_server_header' - * `filter_client_header', `filter_header', `crunch_server_header', + * `flush_socket', ``get_header', `sed', `filter_header' * `server_content_encoding', `server_content_disposition', * `server_last_modified', `client_accept_language', * `crunch_client_header', `client_if_modified_since', * `client_if_none_match', `get_destination_from_headers', - * `parse_header_time' and `server_set_cookie'. + * `parse_header_time', `decompress_iob' and `server_set_cookie'. * * Copyright : Written by and Copyright (C) 2001-2007 the SourceForge * Privoxy team. http://www.privoxy.org/ @@ -45,6 +44,26 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.88 2007/02/07 11:27:12 fabiankeil * * Revisions : * $Log: parsers.c,v $ + * Revision 1.92 2007/03/05 13:25:32 fabiankeil + * - Cosmetical changes for LOG_LEVEL_RE_FILTER messages. + * - Handle "Cookie:" and "Connection:" headers a bit smarter + * (don't crunch them just to recreate them later on). + * - Add another non-standard time format for the cookie + * expiration date detection. + * - Fix a valgrind warning. + * + * Revision 1.91 2007/02/24 12:27:32 fabiankeil + * Improve cookie expiration date detection. + * + * Revision 1.90 2007/02/08 19:12:35 fabiankeil + * Don't run server_content_length() the first time + * sed() parses server headers; only adjust the + * Content-Length header if the page was modified. + * + * Revision 1.89 2007/02/07 16:52:11 fabiankeil + * Fix log messages regarding the cookie time format + * (cookie and request URL were mixed up). + * * Revision 1.88 2007/02/07 11:27:12 fabiankeil * - Let decompress_iob() * - not corrupt the content if decompression fails @@ -672,23 +691,22 @@ const struct parsers client_patterns[] = { { "Host:", 5, client_host }, { "if-modified-since:", 18, client_if_modified_since }, { "Keep-Alive:", 11, crumble }, - { "connection:", 11, crumble }, + { "connection:", 11, connection }, { "proxy-connection:", 17, crumble }, { "max-forwards:", 13, client_max_forwards }, { "Accept-Language:", 16, client_accept_language }, { "if-none-match:", 14, client_if_none_match }, { "X-Filter:", 9, client_x_filter }, { "*", 0, crunch_client_header }, - { "*", 0, filter_client_header }, + { "*", 0, filter_header }, { NULL, 0, NULL } }; const struct parsers server_patterns[] = { { "HTTP", 4, server_http }, { "set-cookie:", 11, server_set_cookie }, - { "connection:", 11, crumble }, + { "connection:", 11, connection }, { "Content-Type:", 13, server_content_type }, - { "Content-Length:", 15, server_content_length }, { "Content-MD5:", 12, server_content_md5 }, { "Content-Encoding:", 17, server_content_encoding }, { "Transfer-Encoding:", 18, server_transfer_coding }, @@ -696,7 +714,7 @@ const struct parsers server_patterns[] = { { "content-disposition:", 20, server_content_disposition }, { "Last-Modified:", 14, server_last_modified }, { "*", 0, crunch_server_header }, - { "*", 0, filter_server_header }, + { "*", 0, filter_header }, { NULL, 0, NULL } }; @@ -868,6 +886,9 @@ jb_err decompress_iob(struct client_state *csp) int status; /* return status of the inflate() call */ z_stream zstr; /* used by calls to zlib */ + assert(csp->iob->cur - csp->iob->buf > 0); + assert(csp->iob->eod - csp->iob->cur > 0); + bufsize = csp->iob->size; skip_size = (size_t)(csp->iob->cur - csp->iob->buf); @@ -1393,64 +1414,12 @@ char *sed(const struct parsers pats[], /* here begins the family of parser functions that reformat header lines */ -/********************************************************************* - * - * Function : filter_server_header - * - * Description : Checks if server header filtering is enabled. - * If it is, filter_header is called to do the work. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * 2 : header = On input, pointer to header to modify. - * On output, pointer to the modified header, or NULL - * to remove the header. This function frees the - * original string if necessary. - * - * Returns : JB_ERR_OK on success and always succeeds - * - *********************************************************************/ -jb_err filter_server_header(struct client_state *csp, char **header) -{ - if (csp->action->flags & ACTION_FILTER_SERVER_HEADERS) - { - filter_header(csp, header); - } - return(JB_ERR_OK); -} - -/********************************************************************* - * - * Function : filter_client_header - * - * Description : Checks if client header filtering is enabled. - * If it is, filter_header is called to do the work. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * 2 : header = On input, pointer to header to modify. - * On output, pointer to the modified header, or NULL - * to remove the header. This function frees the - * original string if necessary. - * - * Returns : JB_ERR_OK on success and always succeeds - * - *********************************************************************/ -jb_err filter_client_header(struct client_state *csp, char **header) -{ - if (csp->action->flags & ACTION_FILTER_CLIENT_HEADERS) - { - filter_header(csp, header); - } - return(JB_ERR_OK); -} - /********************************************************************* * * Function : filter_header * * Description : Executes all text substitutions from all applying - * +filter actions on the header. + * +(server|client)-header-filter actions on the header. * Most of the code was copied from pcrs_filter_response, * including the rather short variable names * @@ -1478,6 +1447,19 @@ jb_err filter_header(struct client_state *csp, char **header) struct list_entry *filtername; int i, found_filters = 0; + int wanted_filter_type; + int multi_action_index; + + if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) + { + wanted_filter_type = FT_SERVER_HEADER_FILTER; + multi_action_index = ACTION_MULTI_SERVER_HEADER_FILTER; + } + else + { + wanted_filter_type = FT_CLIENT_HEADER_FILTER; + multi_action_index = ACTION_MULTI_CLIENT_HEADER_FILTER; + } /* * Need to check the set of re_filterfiles... @@ -1524,7 +1506,13 @@ jb_err filter_header(struct client_state *csp, char **header) */ for (b = fl->f; b; b = b->next) { - for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first; + if (b->type != wanted_filter_type) + { + /* Skip other filter types */ + continue; + } + + for (filtername = csp->action->multi[multi_action_index]->first; filtername ; filtername = filtername->next) { if (strcmp(b->name, filtername->str) == 0) @@ -1537,7 +1525,7 @@ jb_err filter_header(struct client_state *csp, char **header) continue; } - log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s (size %d) with filter %s...", + log_error(LOG_LEVEL_RE_FILTER, "filtering \'%s\' (size %d) with \'%s\' ...", *header, size, b->name); /* Apply all jobs from the joblist */ @@ -1568,7 +1556,7 @@ jb_err filter_header(struct client_state *csp, char **header) } } } - log_error(LOG_LEVEL_RE_FILTER, " ...produced %d hits (new size %d).", current_hits, size); + log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size); hits += current_hits; } } @@ -1590,6 +1578,56 @@ jb_err filter_header(struct client_state *csp, char **header) } +/********************************************************************* + * + * Function : connection + * + * Description : Makes sure that the value of the Connection: header + * is "close" and signals connection_close_adder + * to do nothing. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = On input, pointer to header to modify. + * On output, pointer to the modified header, or NULL + * to remove the header. This function frees the + * original string if necessary. + * + * Returns : JB_ERR_OK on success, or + * JB_ERR_MEMORY on out-of-memory error. + * + *********************************************************************/ +jb_err connection(struct client_state *csp, char **header) +{ + char *old_header = *header; + + /* Do we have a 'Connection: close' header? */ + if (strcmpic(*header, "Connection: close")) + { + /* No, create one */ + *header = strdup("Connection: close"); + if (header == NULL) + { + return JB_ERR_MEMORY; + } + log_error(LOG_LEVEL_HEADER, "Replaced: \'%s\' with \'%s\'", old_header, *header); + freez(old_header); + } + + /* Signal connection_close_adder() to return early. */ + if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) + { + csp->flags |= CSP_FLAG_SERVER_CONNECTION_CLOSE_SET; + } + else + { + csp->flags |= CSP_FLAG_CLIENT_CONNECTION_CLOSE_SET; + } + + return JB_ERR_OK; +} + + /********************************************************************* * * Function : crumble @@ -1634,13 +1672,13 @@ jb_err crumble(struct client_state *csp, char **header) jb_err crunch_server_header(struct client_state *csp, char **header) { const char *crunch_pattern; - /*Is there a header to crunch*/ + /* Do we feel like crunching? */ if ((csp->action->flags & ACTION_CRUNCH_SERVER_HEADER)) { crunch_pattern = csp->action->string[ACTION_STRING_SERVER_HEADER]; - /*Is the current header the lucky one?*/ + /* Is the current header the lucky one? */ if (strstr(*header, crunch_pattern)) { log_error(LOG_LEVEL_HEADER, "Crunching server header: %s (contains: %s)", *header, crunch_pattern); @@ -1923,12 +1961,10 @@ jb_err server_content_encoding(struct client_state *csp, char **header) jb_err server_content_length(struct client_state *csp, char **header) { const size_t max_header_length = 80; - if (csp->content_length != 0) /* Content length could have been modified */ + + /* Regenerate header if the content was modified. */ + if (csp->flags & CSP_FLAG_MODIFIED) { - /* - * XXX: Shouldn't we check if csp->content_length - * is different than the original value? - */ freez(*header); *header = (char *) zalloc(max_header_length); if (*header == NULL) @@ -2177,6 +2213,7 @@ jb_err server_last_modified(struct client_state *csp, char **header) return JB_ERR_OK; } + /********************************************************************* * * Function : client_accept_encoding @@ -2464,13 +2501,13 @@ jb_err client_accept_language(struct client_state *csp, char **header) jb_err crunch_client_header(struct client_state *csp, char **header) { const char *crunch_pattern; - /*Is there a header to crunch*/ - + + /* Do we feel like crunching? */ if ((csp->action->flags & ACTION_CRUNCH_CLIENT_HEADER)) { crunch_pattern = csp->action->string[ACTION_STRING_CLIENT_HEADER]; - /*Is the current header the lucky one?*/ + /* Is the current header the lucky one? */ if (strstr(*header, crunch_pattern)) { log_error(LOG_LEVEL_HEADER, "Crunching client header: %s (contains: %s)", *header, crunch_pattern); @@ -2606,9 +2643,10 @@ jb_err client_from(struct client_state *csp, char **header) * * Function : client_send_cookie * - * Description : Handle the "cookie" header properly. Called from `sed'. - * If cookie is accepted, add it to the cookie_list, - * else we crunch it. Mmmmmmmmmmm ... cookie ...... + * Description : Crunches the "cookie" header if necessary. + * Called from `sed'. + * + * XXX: Stupid name, doesn't send squat. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -2623,25 +2661,13 @@ jb_err client_from(struct client_state *csp, char **header) *********************************************************************/ jb_err client_send_cookie(struct client_state *csp, char **header) { - jb_err result = JB_ERR_OK; - - if ((csp->action->flags & ACTION_NO_COOKIE_READ) == 0) + if (csp->action->flags & ACTION_NO_COOKIE_READ) { - /* strlen("cookie: ") == 8 */ - result = enlist(csp->cookie_list, *header + 8); - } - else - { - log_error(LOG_LEVEL_HEADER, "Crunched outgoing cookie -- yum!"); + log_error(LOG_LEVEL_HEADER, "Crunched outgoing cookie: %s", *header); + freez(*header); } - /* - * Always remove the cookie here. The cookie header - * will be sent at the end of the header. - */ - freez(*header); - - return result; + return JB_ERR_OK; } @@ -2822,6 +2848,9 @@ jb_err client_host(struct client_state *csp, char **header) csp->http->hostport, csp->http->host, csp->http->port); } + /* Signal client_host_adder() to return right away */ + csp->flags |= CSP_FLAG_HOST_HEADER_IS_SET; + return JB_ERR_OK; } @@ -3038,8 +3067,16 @@ jb_err client_host_adder(struct client_state *csp) char *p; jb_err err; + if (csp->flags & CSP_FLAG_HOST_HEADER_IS_SET) + { + /* Header already set by the client, nothing to do. */ + return JB_ERR_OK; + } + if ( !csp->http->hostport || !*(csp->http->hostport)) { + /* XXX: When does this happen and why is it OK? */ + log_error(LOG_LEVEL_INFO, "Weirdness in client_host_adder detected and ignored."); return JB_ERR_OK; } @@ -3055,6 +3092,7 @@ jb_err client_host_adder(struct client_state *csp) p = csp->http->hostport; } + /* XXX: Just add it, we already made sure that it will be unique */ log_error(LOG_LEVEL_HEADER, "addh-unique: Host: %s", p); err = enlist_unique_header(csp->headers, "Host", p); return err; @@ -3068,6 +3106,8 @@ jb_err client_host_adder(struct client_state *csp) * * Description : Used in the add_client_headers list. Called from `sed'. * + * XXX: Remove csp->cookie_list which is no longer used. + * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) * @@ -3148,6 +3188,8 @@ jb_err client_cookie_adder(struct client_state *csp) *********************************************************************/ jb_err client_accept_encoding_adder(struct client_state *csp) { + assert(0); /* Not in use */ + if ( ((csp->action->flags & ACTION_NO_COMPRESSION) != 0) && (!strcmpic(csp->http->ver, "HTTP/1.1")) ) { @@ -3243,9 +3285,10 @@ jb_err client_x_forwarded_adder(struct client_state *csp) * * Function : connection_close_adder * - * Description : Adds a "Connection: close" header to csp->headers - * as a temporary fix for the needed but missing HTTP/1.1 - * support. Called from `sed'. + * Description : "Temporary" fix for the needed but missing HTTP/1.1 + * support. Adds a "Connection: close" header to csp->headers + * unless the header was already present. Called from `sed'. + * * FIXME: This whole function shouldn't be neccessary! * * Parameters : @@ -3257,7 +3300,27 @@ jb_err client_x_forwarded_adder(struct client_state *csp) *********************************************************************/ jb_err connection_close_adder(struct client_state *csp) { + const unsigned int flags = csp->flags; + + /* + * Return right away if + * + * - we're parsing server headers and the server header + * "Connection: close" is already set, or if + * + * - we're parsing client headers and the client header + * "Connection: close" is already set. + */ + if ((flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE + && flags & CSP_FLAG_SERVER_CONNECTION_CLOSE_SET) + ||(!(flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) + && flags & CSP_FLAG_CLIENT_CONNECTION_CLOSE_SET)) + { + return JB_ERR_OK; + } + log_error(LOG_LEVEL_HEADER, "Adding: Connection: close"); + return enlist(csp->headers, "Connection: close"); } @@ -3285,6 +3348,9 @@ jb_err connection_close_adder(struct client_state *csp) *********************************************************************/ jb_err server_http(struct client_state *csp, char **header) { + /* Signal that were now parsing server headers. */ + csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE; + sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status)); if (csp->http->status == 206) { @@ -3348,10 +3414,10 @@ jb_err server_set_cookie(struct client_state *csp, char **header) tm_now = *localtime_r(&now, &tm_now); #elif FEATURE_PTHREAD pthread_mutex_lock(&localtime_mutex); - tm_now = *localtime (&now); + tm_now = *localtime (&now); pthread_mutex_unlock(&localtime_mutex); #else - tm_now = *localtime (&now); + tm_now = *localtime (&now); #endif strftime(tempbuf, BUFFER_SIZE-6, "%b %d %H:%M:%S ", &tm_now); @@ -3409,36 +3475,61 @@ jb_err server_set_cookie(struct client_state *csp, char **header) * if the cookie is still valid, if yes, * rewrite it to a session cookie. */ - if (strncmpic(cur_tag, "expires=", 8) == 0) + if ((strncmpic(cur_tag, "expires=", 8) == 0) && *(cur_tag + 8)) { char *match; + const char *expiration_date = cur_tag + 8; /* Skip "[Ee]xpires=" */ + memset(&tm_cookie, 0, sizeof(tm_cookie)); /* * Try the valid time formats we know about. * + * XXX: This should be moved to parse_header_time(). + * * XXX: Maybe the log messages should be removed * for the next stable release. They just exist to * see which time format gets the most hits and * should be checked for first. */ - if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%y %H:%M:%S ", &tm_cookie))) + if (NULL != (match = strptime(expiration_date, "%a, %e-%b-%y %H:%M:%S ", &tm_cookie))) { + /* 22-Feb-2008 12:01:18 GMT */ log_error(LOG_LEVEL_HEADER, "cookie \'%s\' send by %s appears to be using time format 1.", *header, csp->http->url); } - else if (NULL != (match = strptime(cur_tag, "expires=%A, %e-%b-%Y %H:%M:%S ", &tm_cookie))) + else if (NULL != (match = strptime(expiration_date, "%A, %e-%b-%Y %H:%M:%S ", &tm_cookie))) { + /* Tue, 02-Jun-2037 20:00:00 GMT */ log_error(LOG_LEVEL_HEADER, "cookie \'%s\' send by %s appears to be using time format 2.", *header, csp->http->url); - } - else if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%Y %H:%M:%S ", &tm_cookie))) + else if (NULL != (match = strptime(expiration_date, "%a, %e-%b-%Y %H:%M:%S ", &tm_cookie))) { + /* Tuesday, 02-Jun-2037 20:00:00 GMT */ + /* + * On FreeBSD this is never reached because it's handled + * by "format 2" as well. I am, however, not sure if all + * strptime() implementations behave that way. + */ log_error(LOG_LEVEL_HEADER, "cookie \'%s\' send by %s appears to be using time format 3.", *header, csp->http->url); } + else if (NULL != (match = strptime(expiration_date, "%a, %e %b %Y %H:%M:%S ", &tm_cookie))) + { + /* Fri, 22 Feb 2008 19:20:05 GMT */ + log_error(LOG_LEVEL_HEADER, + "cookie \'%s\' send by %s appears to be using time format 4.", + *header, csp->http->url); + } + else if (NULL != (match = strptime(expiration_date, "%A %b %e %H:%M:%S %Y", &tm_cookie))) + { + /* Thu Mar 08 23:00:00 2007 GMT */ + log_error(LOG_LEVEL_HEADER, + "cookie \'%s\' send by %s appears to be using time format 5.", + *header, csp->http->url); + } /* Did any of them match? */ if (NULL == match) @@ -3449,7 +3540,7 @@ jb_err server_set_cookie(struct client_state *csp, char **header) * XXX: Should we remove the whole cookie instead? */ log_error(LOG_LEVEL_ERROR, - "Can't parse %s. Unsupported time format?", cur_tag); + "Can't parse \'%s\', send by %s. Unsupported time format?", cur_tag, csp->http->url); memmove(cur_tag, next_tag, strlen(next_tag) + 1); changed = 1; } @@ -3499,11 +3590,10 @@ jb_err server_set_cookie(struct client_state *csp, char **header) } else { - log_error(LOG_LEVEL_HEADER, - "Cookie \'%s\' is still valid and has to be rewritten.", *header); - /* - * Delete the tag by copying the rest of the string over it. + * Still valid, delete expiration date by copying + * the rest of the string over it. + * * (Note that we cannot just use "strcpy(cur_tag, next_tag)", * since the behaviour of strcpy is undefined for overlapping * strings.)