X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=parsers.c;h=2650fbb14ab1b4bee5dcabf202aa7af955b4e840;hb=0b26fdd355d5d179b25e7a8da32193aee55ac331;hp=940f45cd4c52a1a59ebc52b331aa3092d6531fb3;hpb=1874e12a0343cc86935d5ce3b544b4b32359d703;p=privoxy.git diff --git a/parsers.c b/parsers.c index 940f45cd..2650fbb1 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.218 2011/02/14 16:11:34 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.232 2011/09/04 11:33:50 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -152,6 +152,9 @@ static jb_err server_http (struct client_state *csp, char **header static jb_err crunch_server_header (struct client_state *csp, char **header); static jb_err server_last_modified (struct client_state *csp, char **header); static jb_err server_content_disposition(struct client_state *csp, char **header); +#ifdef FEATURE_ZLIB +static jb_err server_adjust_content_encoding(struct client_state *csp, char **header); +#endif #ifdef FEATURE_CONNECTION_KEEP_ALIVE static jb_err server_save_content_length(struct client_state *csp, char **header); @@ -184,10 +187,10 @@ struct parsers { /** The header prefix to match */ const char *str; - + /** The length of the prefix to match */ const size_t len; - + /** The function to apply to this line */ const parser_func_ptr parser; }; @@ -347,7 +350,7 @@ jb_err add_to_iob(struct client_state *csp, char *buf, long n) { want *= 2; } - + if (want <= csp->config->buffer_limit && NULL != (p = (char *)realloc(iob->buf, want))) { iob->size = want; @@ -403,7 +406,7 @@ jb_err add_to_iob(struct client_state *csp, char *buf, long n) jb_err decompress_iob(struct client_state *csp) { char *buf; /* new, uncompressed buffer */ - char *cur; /* Current iob position (to keep the original + char *cur; /* Current iob position (to keep the original * iob->cur unmodified if we return early) */ size_t bufsize; /* allocated size of the new buffer */ size_t old_size; /* Content size before decompression */ @@ -551,7 +554,7 @@ jb_err decompress_iob(struct client_state *csp) * * Fortunately, add_to_iob() has thoughtfully null-terminated * the buffer; we can just increment the end pointer to include - * the dummy byte. + * the dummy byte. */ csp->iob->eod++; } @@ -616,10 +619,12 @@ jb_err decompress_iob(struct client_state *csp) } /* - * If we tried the limit and still didn't have enough - * memory, just give up. + * If we reached the buffer limit and still didn't have enough + * memory, just give up. Due to the ceiling enforced by the next + * if block we could actually check for equality here, but as it + * can be easily mistaken for a bug we don't. */ - if (bufsize == csp->config->buffer_limit) + if (bufsize >= csp->config->buffer_limit) { log_error(LOG_LEVEL_ERROR, "Buffer limit reached while decompressing iob"); return JB_ERR_MEMORY; @@ -633,7 +638,7 @@ jb_err decompress_iob(struct client_state *csp) { bufsize = csp->config->buffer_limit; } - + /* Try to allocate the new buffer. */ tmpbuf = realloc(buf, bufsize); if (NULL == tmpbuf) @@ -705,7 +710,7 @@ jb_err decompress_iob(struct client_state *csp) csp->iob->cur = csp->iob->buf + skip_size; csp->iob->eod = (char *)zstr.next_out; csp->iob->size = bufsize; - + /* * Make sure the new uncompressed iob obeys some minimal * consistency conditions. @@ -877,7 +882,7 @@ char *get_header(struct iob *iob) * Header spans multiple lines, append the next one. */ char *continued_header; - + continued_header = get_header_line(iob); if ((continued_header == NULL) || (*continued_header == '\0')) { @@ -1009,7 +1014,7 @@ char *get_header_value(const struct list *header_list, const char *header_name) } } - /* + /* * Not found */ return NULL; @@ -1021,7 +1026,7 @@ char *get_header_value(const struct list *header_list, const char *header_name) * * Function : scan_headers * - * Description : Scans headers, applies tags and updates action bits. + * Description : Scans headers, applies tags and updates action bits. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -1137,7 +1142,7 @@ jb_err update_server_headers(struct client_state *csp) { "Content-Length:", 15, server_adjust_content_length }, { "Transfer-Encoding:", 18, server_transfer_coding }, #ifdef FEATURE_ZLIB - { "Content-Encoding:", 17, server_content_encoding }, + { "Content-Encoding:", 17, server_adjust_content_encoding }, #endif /* def FEATURE_ZLIB */ { NULL, 0, NULL } }; @@ -1182,6 +1187,18 @@ jb_err update_server_headers(struct client_state *csp) } #endif /* def FEATURE_CONNECTION_KEEP_ALIVE */ +#ifdef FEATURE_COMPRESSION + if ((JB_ERR_OK == err) + && (csp->flags & CSP_FLAG_BUFFERED_CONTENT_DEFLATED)) + { + err = enlist_unique_header(csp->headers, "Content-Encoding", "deflate"); + if (JB_ERR_OK == err) + { + log_error(LOG_LEVEL_HEADER, "Added header: Content-Encoding: deflate"); + } + } +#endif + return err; } @@ -1327,7 +1344,7 @@ static jb_err header_tagger(struct client_state *csp, char *header) b->name); continue; } - + if (!list_contains_item(csp->tags, tag)) { if (JB_ERR_OK != enlist(csp->tags, tag)) @@ -1491,7 +1508,7 @@ static jb_err filter_header(struct client_state *csp, char **header) matches = pcrs_execute(job, *header, size, &newheader, &size); if ( 0 < matches ) { - current_hits += matches; + current_hits += matches; log_error(LOG_LEVEL_HEADER, "Transforming \"%s\" to \"%s\"", *header, newheader); freez(*header); *header = newheader; @@ -1805,7 +1822,7 @@ static jb_err client_save_content_length(struct client_state *csp, char **header * Function : client_connection * * Description : Makes sure a proper "Connection:" header is - * set and signals connection_header_adder + * set and signals connection_header_adder * to do nothing. * * Parameters : @@ -1942,7 +1959,7 @@ static jb_err crunch_server_header(struct client_state *csp, char **header) /* Is the current header the lucky one? */ if (strstr(*header, crunch_pattern)) { - log_error(LOG_LEVEL_HEADER, "Crunching server header: %s (contains: %s)", *header, crunch_pattern); + log_error(LOG_LEVEL_HEADER, "Crunching server header: %s (contains: %s)", *header, crunch_pattern); freez(*header); } } @@ -2022,8 +2039,8 @@ static jb_err server_content_type(struct client_state *csp, char **header) if (csp->action->flags & ACTION_CONTENT_TYPE_OVERWRITE) { /* - * Make sure the user doesn't accidently - * change the content type of binary documents. + * Make sure the user doesn't accidentally + * change the content type of binary documents. */ if ((csp->content_type & CT_TEXT) || (csp->action->flags & ACTION_FORCE_TEXT_MODE)) { @@ -2116,22 +2133,16 @@ static jb_err server_transfer_coding(struct client_state *csp, char **header) * * Function : server_content_encoding * - * Description : This function is run twice for each request, - * unless FEATURE_ZLIB and filtering are disabled. + * Description : Used to check if the content is compressed, and if + * FEATURE_ZLIB is disabled, filtering is disabled as + * well. * - * The first run is used to check if the content - * is compressed, if FEATURE_ZLIB is disabled - * filtering is then disabled as well, if FEATURE_ZLIB - * is enabled the content is marked for decompression. - * - * The second run is used to remove the Content-Encoding - * header if the decompression was successful. + * If FEATURE_ZLIB is enabled and the compression type + * supported, the content is marked for decompression. * * XXX: Doesn't properly deal with multiple or with * unsupported but unknown encodings. * Is case-sensitive but shouldn't be. - * The second run should be factored out into - * a different function. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -2147,21 +2158,7 @@ static jb_err server_transfer_coding(struct client_state *csp, char **header) static jb_err server_content_encoding(struct client_state *csp, char **header) { #ifdef FEATURE_ZLIB - if ((csp->flags & CSP_FLAG_MODIFIED) - && (csp->content_type & (CT_GZIP | CT_DEFLATE))) - { - /* - * We successfully decompressed the content, - * and have to clean the header now, so the - * client no longer expects compressed data.. - * - * XXX: There is a difference between cleaning - * and removing it completely. - */ - log_error(LOG_LEVEL_HEADER, "Crunching: %s", *header); - freez(*header); - } - else if (strstr(*header, "sdch")) + if (strstr(*header, "sdch")) { /* * Shared Dictionary Compression over HTTP isn't supported, @@ -2235,6 +2232,49 @@ static jb_err server_content_encoding(struct client_state *csp, char **header) } +#ifdef FEATURE_ZLIB +/********************************************************************* + * + * Function : server_adjust_content_encoding + * + * Description : Remove the Content-Encoding header if the + * decompression was successful and the content + * has been modifed. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = On input, pointer to header to modify. + * On output, pointer to the modified header, or NULL + * to remove the header. This function frees the + * original string if necessary. + * + * Returns : JB_ERR_OK on success, or + * JB_ERR_MEMORY on out-of-memory error. + * + *********************************************************************/ +static jb_err server_adjust_content_encoding(struct client_state *csp, char **header) +{ + if ((csp->flags & CSP_FLAG_MODIFIED) + && (csp->content_type & (CT_GZIP | CT_DEFLATE))) + { + /* + * We successfully decompressed the content, + * and have to clean the header now, so the + * client no longer expects compressed data. + * + * XXX: There is a difference between cleaning + * and removing it completely. + */ + log_error(LOG_LEVEL_HEADER, "Crunching: %s", *header); + freez(*header); + } + + return JB_ERR_OK; + +} +#endif /* defined(FEATURE_ZLIB) */ + + /********************************************************************* * * Function : server_adjust_content_length @@ -2388,7 +2428,7 @@ static jb_err server_content_disposition(struct client_state *csp, char **header return JB_ERR_OK; } else - { + { /* * Replacing Content-Disposition header */ @@ -2453,18 +2493,18 @@ static jb_err server_last_modified(struct client_state *csp, char **header) return JB_ERR_OK; } else if (0 == strcmpic(newval, "reset-to-request-time")) - { + { /* * Setting Last-Modified Header to now. */ get_http_time(0, buf, sizeof(buf)); freez(*header); *header = strdup("Last-Modified: "); - string_append(header, buf); + string_append(header, buf); if (*header == NULL) { - log_error(LOG_LEVEL_HEADER, "Insufficient memory. Last-Modified header got lost, boohoo."); + log_error(LOG_LEVEL_HEADER, "Insufficient memory. Last-Modified header got lost, boohoo."); } else { @@ -2499,7 +2539,7 @@ static jb_err server_last_modified(struct client_state *csp, char **header) if (negative_delta) { - rtime *= -1; + rtime *= -1; log_error(LOG_LEVEL_HEADER, "Server time in the future."); } rtime = pick_from_range(rtime); @@ -2534,7 +2574,7 @@ static jb_err server_last_modified(struct client_state *csp, char **header) if (*header == NULL) { log_error(LOG_LEVEL_ERROR, "Insufficient memory, header crunched without replacement."); - return JB_ERR_MEMORY; + return JB_ERR_MEMORY; } days = rtime / (3600 * 24); @@ -2579,6 +2619,13 @@ static jb_err server_last_modified(struct client_state *csp, char **header) *********************************************************************/ static jb_err client_accept_encoding(struct client_state *csp, char **header) { +#ifdef FEATURE_COMPRESSION + if ((csp->config->feature_flags & RUNTIME_FEATURE_COMPRESSION) + && strstr(*header, "deflate")) + { + csp->flags |= CSP_FLAG_CLIENT_SUPPORTS_DEFLATE; + } +#endif if ((csp->action->flags & ACTION_NO_COMPRESSION) != 0) { log_error(LOG_LEVEL_HEADER, "Suppressed offer to compress content"); @@ -2643,7 +2690,7 @@ static jb_err client_referrer(struct client_state *csp, char **header) /* booleans for parameters we have to check multiple times */ int parameter_conditional_block; int parameter_conditional_forge; - + #ifdef FEATURE_FORCE_LOAD /* * Since the referrer can include the prefix even @@ -2741,18 +2788,18 @@ static jb_err client_accept_language(struct client_state *csp, char **header) return JB_ERR_OK; } else - { + { /* * Replacing Accept-Language header */ freez(*header); *header = strdup("Accept-Language: "); - string_append(header, newval); + string_append(header, newval); if (*header == NULL) { log_error(LOG_LEVEL_ERROR, - "Insufficient memory. Accept-Language header crunched without replacement."); + "Insufficient memory. Accept-Language header crunched without replacement."); } else { @@ -2793,7 +2840,7 @@ static jb_err crunch_client_header(struct client_state *csp, char **header) /* Is the current header the lucky one? */ if (strstr(*header, crunch_pattern)) { - log_error(LOG_LEVEL_HEADER, "Crunching client header: %s (contains: %s)", *header, crunch_pattern); + log_error(LOG_LEVEL_HEADER, "Crunching client header: %s (contains: %s)", *header, crunch_pattern); freez(*header); } } @@ -3098,10 +3145,10 @@ static jb_err client_host(struct client_state *csp, char **header) return JB_ERR_OK; } - if (!csp->http->hostport || (*csp->http->hostport == '*') || + if (!csp->http->hostport || (*csp->http->hostport == '*') || *csp->http->hostport == ' ' || *csp->http->hostport == '\0') { - + if (NULL == (p = strdup((*header)+6))) { return JB_ERR_MEMORY; @@ -3164,13 +3211,13 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) struct tm gmt; #endif struct tm *timeptr = NULL; - time_t tm = 0; + time_t tm = 0; const char *newval; char * endptr; - + if ( 0 == strcmpic(*header, "If-Modified-Since: Wed, 08 Jun 1955 12:00:00 GMT")) { - /* + /* * The client got an error message because of a temporary problem, * the problem is gone and the client now tries to revalidate our * error message on the real server. The revalidation would always @@ -3210,7 +3257,7 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) *header, rtime, (rtime == 1 || rtime == -1) ? "e": "es"); if (negative_range) { - rtime *= -1; + rtime *= -1; } rtime *= 60; rtime = pick_from_range(rtime); @@ -3247,7 +3294,7 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) if (*header == NULL) { log_error(LOG_LEVEL_HEADER, "Insufficient memory, header crunched without replacement."); - return JB_ERR_MEMORY; + return JB_ERR_MEMORY; } hours = rtime / 3600; @@ -3287,7 +3334,7 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) static jb_err client_if_none_match(struct client_state *csp, char **header) { if (csp->action->flags & ACTION_CRUNCH_IF_NONE_MATCH) - { + { log_error(LOG_LEVEL_HEADER, "Crunching %s", *header); freez(*header); } @@ -3329,7 +3376,7 @@ jb_err client_x_filter(struct client_state *csp, char **header) "force-text-mode overruled the client's request to fetch without filtering!"); } else - { + { csp->content_type = CT_TABOO; /* XXX: This hack shouldn't be necessary */ csp->flags |= CSP_FLAG_NO_FILTERING; log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering."); @@ -3338,7 +3385,7 @@ jb_err client_x_filter(struct client_state *csp, char **header) freez(*header); } } - return JB_ERR_OK; + return JB_ERR_OK; } @@ -3372,7 +3419,7 @@ static jb_err client_range(struct client_state *csp, char **header) freez(*header); } - return JB_ERR_OK; + return JB_ERR_OK; } /* the following functions add headers directly to the header list */ @@ -3543,7 +3590,7 @@ static jb_err client_x_forwarded_for_adder(struct client_state *csp) * * Function : server_connection_adder * - * Description : Adds an appropiate "Connection:" header to csp->headers + * Description : Adds an appropriate "Connection:" header to csp->headers * unless the header was already present. Called from `sed'. * * Parameters : @@ -3596,7 +3643,7 @@ static jb_err server_connection_adder(struct client_state *csp) * * Description : Adds a "Proxy-Connection: keep-alive" header to * csp->headers if the client asked for keep-alive. - * XXX: We should reuse existant ones. + * XXX: We should reuse existent ones. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3741,7 +3788,7 @@ static jb_err server_http(struct client_state *csp, char **header) static jb_err server_set_cookie(struct client_state *csp, char **header) { time_t now; - time_t cookie_time; + time_t cookie_time; time(&now); @@ -3908,7 +3955,7 @@ static jb_err server_set_cookie(struct client_state *csp, char **header) * * Function : strclean * - * Description : In-Situ-Eliminate all occurances of substring in + * Description : In-Situ-Eliminate all occurrences of substring in * string * * Parameters : @@ -3951,7 +3998,7 @@ int strclean(char *string, const char *substring) * to get the numerical respresentation. * * Parameters : - * 1 : header_time = HTTP header time as string. + * 1 : header_time = HTTP header time as string. * 2 : result = storage for header_time in seconds * * Returns : JB_ERR_OK if the time format was recognized, or @@ -3961,34 +4008,49 @@ int strclean(char *string, const char *substring) static jb_err parse_header_time(const char *header_time, time_t *result) { struct tm gmt; - /* - * Zero out gmt to prevent time zone offsets. - * - * While this is only necessary on some platforms - * (mingw32 for example), I don't know how to - * detect these automatically and doing it everywhere - * shouldn't hurt. + * Checking for two-digit years first in an + * attempt to work around GNU libc's strptime() + * reporting negative year values when using %Y. */ - memset(&gmt, 0, sizeof(gmt)); - - /* Tue, 02 Jun 2037 20:00:00 */ - if ((NULL == strptime(header_time, "%a, %d %b %Y %H:%M:%S", &gmt)) - /* Tue, 02-Jun-2037 20:00:00 */ - && (NULL == strptime(header_time, "%a, %d-%b-%Y %H:%M:%S", &gmt)) - /* Tue, 02-Jun-37 20:00:00 */ - && (NULL == strptime(header_time, "%a, %d-%b-%y %H:%M:%S", &gmt)) - /* Tuesday, 02-Jun-2037 20:00:00 */ - && (NULL == strptime(header_time, "%A, %d-%b-%Y %H:%M:%S", &gmt)) - /* Tuesday Jun 02 20:00:00 2037 */ - && (NULL == strptime(header_time, "%A %b %d %H:%M:%S %Y", &gmt))) + static const char * const time_formats[] = { + /* Tue, 02-Jun-37 20:00:00 */ + "%a, %d-%b-%y %H:%M:%S", + /* Tue, 02 Jun 2037 20:00:00 */ + "%a, %d %b %Y %H:%M:%S", + /* Tue, 02-Jun-2037 20:00:00 */ + "%a, %d-%b-%Y %H:%M:%S", + /* Tuesday, 02-Jun-2037 20:00:00 */ + "%A, %d-%b-%Y %H:%M:%S", + /* Tuesday Jun 02 20:00:00 2037 */ + "%A %b %d %H:%M:%S %Y" + }; + unsigned int i; + + for (i = 0; i < SZ(time_formats); i++) { - return JB_ERR_PARSE; - } + /* + * Zero out gmt to prevent time zone offsets. + * Documented to be required for GNU libc. + */ + memset(&gmt, 0, sizeof(gmt)); - *result = timegm(&gmt); + if (NULL != strptime(header_time, time_formats[i], &gmt)) + { + /* Sanity check for GNU libc. */ + if (gmt.tm_year < 0) + { + log_error(LOG_LEVEL_HEADER, + "Failed to parse '%s' using '%s'. Moving on.", + header_time, time_formats[i]); + continue; + } + *result = timegm(&gmt); + return JB_ERR_OK; + } + } - return JB_ERR_OK; + return JB_ERR_PARSE; } @@ -4007,7 +4069,7 @@ static jb_err parse_header_time(const char *header_time, time_t *result) * Parameters : * 1 : headers = List of headers (one of them hopefully being * the "Host:" header) - * 2 : http = storage for the result (host, port and hostport). + * 2 : http = storage for the result (host, port and hostport). * * Returns : JB_ERR_MEMORY in case of memory problems, * JB_ERR_PARSE if the host header couldn't be found, @@ -4028,7 +4090,8 @@ jb_err get_destination_from_headers(const struct list *headers, struct http_requ return JB_ERR_PARSE; } - if (NULL == (p = strdup((host)))) + p = strdup(host); + if (NULL == p) { log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header"); return JB_ERR_MEMORY; @@ -4081,7 +4144,7 @@ jb_err get_destination_from_headers(const struct list *headers, struct http_requ * * Description : Helper for client_referrer to forge a referer as * 'http://[hostname:port/' to fool stupid - * checks for in-site links + * checks for in-site links * * Parameters : * 1 : header = Pointer to header pointer