X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=parsers.c;h=df2a32551412093dacbabd3891ac85ac25f2a277;hp=c5f7a08708f20d703137e8b02fd54a7162ae0493;hb=bd7800c807e535d21222fa41cb29537cb865b170;hpb=686f8b0f33b78b0fe456fd53852550f73e15eb38 diff --git a/parsers.c b/parsers.c index c5f7a087..df2a3255 100644 --- a/parsers.c +++ b/parsers.c @@ -1,12 +1,11 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.289 2014/07/25 11:55:47 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ * * Purpose : Declares functions to parse/crunch headers and pages. * - * Copyright : Written by and Copyright (C) 2001-2014 the - * Privoxy team. http://www.privoxy.org/ + * Copyright : Written by and Copyright (C) 2001-2017 the + * Privoxy team. https://www.privoxy.org/ * * Based on the Internet Junkbuster originally written * by and Copyright (C) 1997 Anonymous Coders and @@ -90,12 +89,11 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.289 2014/07/25 11:55:47 fabiankei #include "strptime.h" #endif -const char parsers_h_rcs[] = PARSERS_H_VERSION; - static char *get_header_line(struct iob *iob); static jb_err scan_headers(struct client_state *csp); static jb_err header_tagger(struct client_state *csp, char *header); static jb_err parse_header_time(const char *header_time, time_t *result); +static jb_err parse_time_header(const char *header, time_t *result); static jb_err crumble (struct client_state *csp, char **header); static jb_err filter_header (struct client_state *csp, char **header); @@ -251,13 +249,14 @@ static const add_header_func_ptr add_server_headers[] = { /********************************************************************* * - * Function : flush_socket + * Function : flush_iob * * Description : Write any pending "buffered" content. * * Parameters : * 1 : fd = file descriptor of the socket to read * 2 : iob = The I/O buffer to flush, usually csp->iob. + * 3 : delay = Number of milliseconds to delay the writes * * Returns : On success, the number of bytes written are returned (zero * indicates nothing was written). On error, -1 is returned, @@ -267,7 +266,7 @@ static const add_header_func_ptr add_server_headers[] = { * file, the results are not portable. * *********************************************************************/ -long flush_socket(jb_socket fd, struct iob *iob) +long flush_iob(jb_socket fd, struct iob *iob, unsigned int delay) { long len = iob->eod - iob->cur; @@ -276,7 +275,7 @@ long flush_socket(jb_socket fd, struct iob *iob) return(0); } - if (write_socket(fd, iob->cur, (size_t)len)) + if (write_socket_delayed(fd, iob->cur, (size_t)len, delay)) { return(-1); } @@ -386,7 +385,7 @@ jb_err add_to_iob(struct iob *iob, const size_t buffer_limit, char *src, long n) void clear_iob(struct iob *iob) { free(iob->buf); - memset(iob, '\0', sizeof(*iob));; + memset(iob, '\0', sizeof(*iob)); } @@ -420,8 +419,13 @@ jb_err decompress_iob(struct client_state *csp) int status; /* return status of the inflate() call */ z_stream zstr; /* used by calls to zlib */ +#ifdef FUZZ + assert(csp->iob->cur - csp->iob->buf >= 0); + assert(csp->iob->eod - csp->iob->cur >= 0); +#else assert(csp->iob->cur - csp->iob->buf > 0); assert(csp->iob->eod - csp->iob->cur > 0); +#endif bufsize = csp->iob->size; skip_size = (size_t)(csp->iob->cur - csp->iob->buf); @@ -435,7 +439,9 @@ jb_err decompress_iob(struct client_state *csp) * This is to protect the parsing of gzipped data, * but it should(?) be valid for deflated data also. */ - log_error(LOG_LEVEL_ERROR, "Buffer too small decompressing iob"); + log_error(LOG_LEVEL_ERROR, + "Insufficient data to start decompression. Bytes in buffer: %d", + csp->iob->eod - csp->iob->cur); return JB_ERR_COMPRESS; } @@ -715,7 +721,7 @@ jb_err decompress_iob(struct client_state *csp) * Make sure the new uncompressed iob obeys some minimal * consistency conditions. */ - if ((csp->iob->buf < csp->iob->cur) + if ((csp->iob->buf <= csp->iob->cur) && (csp->iob->cur <= csp->iob->eod) && (csp->iob->eod <= csp->iob->buf + csp->iob->size)) { @@ -1115,7 +1121,8 @@ static void enforce_header_order(struct list *headers, const struct list *ordere * server and header filtering. * * Returns : JB_ERR_OK in case off success, or - * JB_ERR_MEMORY on out-of-memory error. + * JB_ERR_MEMORY on some out-of-memory errors, or + * JB_ERR_PARSE in case of fatal parse errors. * *********************************************************************/ jb_err sed(struct client_state *csp, int filter_server_headers) @@ -1141,9 +1148,9 @@ jb_err sed(struct client_state *csp, int filter_server_headers) check_negative_tag_patterns(csp, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN); } - while ((err == JB_ERR_OK) && (v->str != NULL)) + while (v->str != NULL) { - for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next) + for (p = csp->headers->first; p != NULL; p = p->next) { /* Header crunch()ed in previous run? -> ignore */ if (p->str == NULL) continue; @@ -1153,6 +1160,10 @@ jb_err sed(struct client_state *csp, int filter_server_headers) (v->len == CHECK_EVERY_HEADER_REMAINING)) { err = v->parser(csp, &(p->str)); + if (err != JB_ERR_OK) + { + return err; + } } } v++; @@ -1174,6 +1185,46 @@ jb_err sed(struct client_state *csp, int filter_server_headers) } +#ifdef FEATURE_HTTPS_INSPECTION +/********************************************************************* + * + * Function : sed_https + * + * Description : add, delete or modify lines in the HTTPS client + * header streams. Wrapper around sed(). + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK in case off success, or + * JB_ERR_MEMORY on some out-of-memory errors, or + * JB_ERR_PARSE in case of fatal parse errors. + * + *********************************************************************/ +jb_err sed_https(struct client_state *csp) +{ + jb_err err; + struct list headers; + + /* + * Temporarly replace csp->headers with csp->https_headers + * to trick sed() into filtering the https headers. + */ + headers.first = csp->headers->first; + headers.last = csp->headers->last; + csp->headers->first = csp->https_headers->first; + csp->headers->last = csp->https_headers->last; + + err = sed(csp, FILTER_CLIENT_HEADERS); + + csp->headers->first = headers.first; + csp->headers->last = headers.last; + + return err; +} +#endif /* def FEATURE_HTTPS_INSPECTION */ + + /********************************************************************* * * Function : update_server_headers @@ -1657,6 +1708,8 @@ static jb_err server_keep_alive(struct client_state *csp, char **header) csp->flags |= CSP_FLAG_SERVER_KEEP_ALIVE_TIMEOUT_SET; } + freez(*header); + return JB_ERR_OK; } @@ -1801,7 +1854,9 @@ static jb_err client_keep_alive(struct client_state *csp, char **header) static jb_err get_content_length(const char *header_value, unsigned long long *length) { #ifdef _WIN32 - assert(sizeof(unsigned long long) > 4); +#if SIZEOF_LONG_LONG < 8 +#error sizeof(unsigned long long) too small +#endif if (1 != sscanf(header_value, "%I64u", length)) #else if (1 != sscanf(header_value, "%llu", length)) @@ -2190,11 +2245,12 @@ static jb_err server_content_type(struct client_state *csp, char **header) */ if ((csp->content_type & CT_TEXT) || (csp->action->flags & ACTION_FORCE_TEXT_MODE)) { + jb_err err; freez(*header); *header = strdup_or_die("Content-Type: "); - string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]); - if (header == NULL) + err = string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]); + if (JB_ERR_OK != err) { log_error(LOG_LEVEL_HEADER, "Insufficient memory to replace Content-Type!"); return JB_ERR_MEMORY; @@ -2316,8 +2372,7 @@ static jb_err server_content_encoding(struct client_state *csp, char **header) /* * Log a warning if the user expects the content to be filtered. */ - if ((csp->rlist != NULL) && - (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + if (content_filters_enabled(csp->action)) { log_error(LOG_LEVEL_INFO, "SDCH-compressed content detected, content filtering disabled. " @@ -2362,8 +2417,7 @@ static jb_err server_content_encoding(struct client_state *csp, char **header) /* * Log a warning if the user expects the content to be filtered. */ - if ((csp->rlist != NULL) && - (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + if (content_filters_enabled(csp->action)) { log_error(LOG_LEVEL_INFO, "Compressed content detected, content filtering disabled. " @@ -2661,13 +2715,12 @@ static jb_err server_last_modified(struct client_state *csp, char **header) } else if (0 == strcmpic(newval, "randomize")) { - const char *header_time = *header + sizeof("Last-Modified:"); - log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header); - if (JB_ERR_OK != parse_header_time(header_time, &last_modified)) + if (JB_ERR_OK != parse_time_header(*header, &last_modified)) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header); + log_error(LOG_LEVEL_HEADER, + "Couldn't parse time in %s (crunching!)", *header); freez(*header); } else @@ -3278,6 +3331,13 @@ static jb_err client_host(struct client_state *csp, char **header) { char *p, *q; + if (strlen(*header) < 7) + { + log_error(LOG_LEVEL_HEADER, "Removing empty Host header"); + freez(*header); + return JB_ERR_OK; + } + if (!csp->http->hostport || (*csp->http->hostport == '*') || *csp->http->hostport == ' ' || *csp->http->hostport == '\0') { @@ -3364,11 +3424,10 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) } else /* add random value */ { - const char *header_time = *header + sizeof("If-Modified-Since:"); - - if (JB_ERR_OK != parse_header_time(header_time, &tm)) + if (JB_ERR_OK != parse_time_header(*header, &tm)) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header); + log_error(LOG_LEVEL_HEADER, + "Couldn't parse time in %s (crunching!)", *header); freez(*header); } else @@ -3592,9 +3651,8 @@ static jb_err client_host_adder(struct client_state *csp) if (!csp->http->hostport || !*(csp->http->hostport)) { - /* XXX: When does this happen and why is it OK? */ - log_error(LOG_LEVEL_INFO, "Weirdness in client_host_adder detected and ignored."); - return JB_ERR_OK; + log_error(LOG_LEVEL_ERROR, "Destination host unknown."); + return JB_ERR_PARSE; } /* @@ -3787,7 +3845,8 @@ static jb_err server_proxy_connection_adder(struct client_state *csp) * Function : client_connection_header_adder * * Description : Adds a proper "Connection:" header to csp->headers - * unless the header was already present. Called from `sed'. + * unless the header was already present or it's a + * CONNECT request. Called from `sed'. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3806,10 +3865,20 @@ static jb_err client_connection_header_adder(struct client_state *csp) return JB_ERR_OK; } + /* + * In case of CONNECT requests "Connection: close" is implied, + * but actually setting the header has been reported to cause + * problems with some forwarding proxies that close the + * connection prematurely. + */ + if (csp->http->ssl != 0) + { + return JB_ERR_OK; + } + #ifdef FEATURE_CONNECTION_KEEP_ALIVE if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE) && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED) - && (csp->http->ssl == 0) && !strcmpic(csp->http->ver, "HTTP/1.1")) { csp->flags |= CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE; @@ -3832,6 +3901,7 @@ static jb_err client_connection_header_adder(struct client_state *csp) * is a partial range (HTTP status 206) * - Rewrite HTTP/1.1 answers to HTTP/1.0 if +downgrade * action applies. + * - Normalize the HTTP-version. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3841,36 +3911,84 @@ static jb_err client_connection_header_adder(struct client_state *csp) * original string if necessary. * * Returns : JB_ERR_OK on success, or - * JB_ERR_MEMORY on out-of-memory error. + * JB_ERR_PARSE on fatal parse errors. * *********************************************************************/ static jb_err server_http(struct client_state *csp, char **header) { - sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status)); + char *reason_phrase = NULL; + char *new_response_line; + char *p; + size_t length; + unsigned int major_version; + unsigned int minor_version; + + /* Get the reason phrase which start after the second whitespace */ + p = strchr(*header, ' '); + if (NULL != p) + { + p++; + reason_phrase = strchr(p, ' '); + } + + if (reason_phrase != NULL) + { + reason_phrase++; + } + else + { + log_error(LOG_LEVEL_ERROR, + "Response line lacks reason phrase: %s", *header); + reason_phrase=""; + } + + if (3 != sscanf(*header, "HTTP/%u.%u %d", &major_version, + &minor_version, &(csp->http->status))) + { + log_error(LOG_LEVEL_ERROR, + "Failed to parse the response line: %s", *header); + return JB_ERR_PARSE; + } + if (csp->http->status == 206) { csp->content_type = CT_TABOO; } - if ((csp->action->flags & ACTION_DOWNGRADE) != 0) + if (major_version != 1 || (minor_version != 0 && minor_version != 1)) { - /* XXX: Should we do a real validity check here? */ - if (strlen(*header) > 8) - { - (*header)[7] = '0'; - log_error(LOG_LEVEL_HEADER, "Downgraded answer to HTTP/1.0"); - } - else - { - /* - * XXX: Should we block the request or - * enlist a valid status code line here? - */ - log_error(LOG_LEVEL_INFO, "Malformed server response detected. " - "Downgrading to HTTP/1.0 impossible."); - } + /* + * According to RFC 7230 2.6 intermediaries MUST send + * their own HTTP-version in forwarded messages. + */ + log_error(LOG_LEVEL_ERROR, + "Unsupported HTTP version. Downgrading to 1.1."); + major_version = 1; + minor_version = 1; } + if (((csp->action->flags & ACTION_DOWNGRADE) != 0) && (minor_version == 1)) + { + log_error(LOG_LEVEL_HEADER, "Downgrading answer to HTTP/1.0"); + minor_version = 0; + } + + /* Rebuild response line. */ + length = sizeof("HTTP/1.1 200 ") + strlen(reason_phrase) + 1; + new_response_line = malloc_or_die(length); + + snprintf(new_response_line, length, "HTTP/%u.%u %d %s", + major_version, minor_version, csp->http->status, reason_phrase); + + if (0 != strcmp(*header, new_response_line)) + { + log_error(LOG_LEVEL_HEADER, "Response line '%s' changed to '%s'", + *header, new_response_line); + } + + freez(*header); + *header = new_response_line; + return JB_ERR_OK; } @@ -4251,7 +4369,13 @@ static jb_err parse_header_time(const char *header_time, time_t *result) time_t result2; tm = gmtime(result); - strftime(recreated_date, sizeof(recreated_date), time_formats[i], tm); + if (!strftime(recreated_date, sizeof(recreated_date), + time_formats[i], tm)) + { + log_error(LOG_LEVEL_ERROR, "Failed to recreate date '%s' with '%s'.", + header_time, time_formats[i]); + continue; + } memset(&gmt, 0, sizeof(gmt)); if (NULL == strptime(recreated_date, time_formats[i], &gmt)) { @@ -4279,6 +4403,44 @@ static jb_err parse_header_time(const char *header_time, time_t *result) } +/********************************************************************* + * + * Function : parse_time_header + * + * Description : Parses the time in an HTTP time header to get + * the numerical respresentation. + * + * Parameters : + * 1 : header = HTTP header with a time value + * 2 : result = storage for header_time in seconds + * + * Returns : JB_ERR_OK if the time format was recognized, or + * JB_ERR_PARSE otherwise. + * + *********************************************************************/ +static jb_err parse_time_header(const char *header, time_t *result) +{ + const char *header_time; + + header_time = strchr(header, ':'); + + /* + * Currently this can't happen as all callers are called + * through sed() which requires a header name followed by + * a colon. + */ + assert(header_time != NULL); + + header_time++; + if (*header_time == ' ') + { + header_time++; + } + + return parse_header_time(header_time, result); + +} + /********************************************************************* * @@ -4332,12 +4494,12 @@ jb_err get_destination_from_headers(const struct list *headers, struct http_requ } else { - http->port = http->ssl ? 443 : 80; + http->port = 80; } /* Rebuild request URL */ freez(http->url); - http->url = strdup(http->ssl ? "https://" : "http://"); + http->url = strdup("http://"); string_append(&http->url, http->hostport); string_append(&http->url, http->path); if (http->url == NULL) @@ -4345,14 +4507,113 @@ jb_err get_destination_from_headers(const struct list *headers, struct http_requ return JB_ERR_MEMORY; } - log_error(LOG_LEVEL_HEADER, "Destination extracted from \"Host:\" header. New request URL: %s", + log_error(LOG_LEVEL_HEADER, + "Destination extracted from \"Host\" header. New request URL: %s", http->url); + /* + * Regenerate request line in "proxy format" + * to make rewrites more convenient. + */ + assert(http->cmd != NULL); + freez(http->cmd); + http->cmd = strdup_or_die(http->gpc); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->url); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->ver); + if (http->cmd == NULL) + { + return JB_ERR_MEMORY; + } + return JB_ERR_OK; } +#ifdef FEATURE_HTTPS_INSPECTION +/********************************************************************* + * + * Function : get_destination_from_https_headers + * + * Description : Parse the previously encrypted "Host:" header to + * get the request's destination. + * + * Parameters : + * 1 : headers = List of headers (one of them hopefully being + * the "Host:" header) + * 2 : http = storage for the result (host, port and hostport). + * + * Returns : JB_ERR_MEMORY (or terminates) in case of memory problems, + * JB_ERR_PARSE if the host header couldn't be found, + * JB_ERR_OK otherwise. + * + *********************************************************************/ +jb_err get_destination_from_https_headers(const struct list *headers, struct http_request *http) +{ + char *q; + char *p; + char *host; + + host = get_header_value(headers, "Host:"); + + if (NULL == host) + { + log_error(LOG_LEVEL_ERROR, "No \"Host:\" header found."); + return JB_ERR_PARSE; + } + + p = strdup_or_die(host); + chomp(p); + q = strdup_or_die(p); + + freez(http->hostport); + http->hostport = p; + freez(http->host); + http->host = q; + q = strchr(http->host, ':'); + if (q != NULL) + { + /* Terminate hostname and evaluate port string */ + *q++ = '\0'; + http->port = atoi(q); + } + else + { + http->port = 443; + } + + /* Rebuild request URL */ + freez(http->url); + http->url = strdup_or_die(http->path); + + log_error(LOG_LEVEL_HEADER, + "Destination extracted from \"Host\" header. New request URL: %s", + http->url); + + /* + * Regenerate request line in "proxy format" + * to make rewrites more convenient. + */ + assert(http->cmd != NULL); + freez(http->cmd); + http->cmd = strdup_or_die(http->gpc); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->url); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->ver); + if (http->cmd == NULL) + { + return JB_ERR_MEMORY; + } + + return JB_ERR_OK; + +} +#endif /* def FEATURE_HTTPS_INSPECTION */ + + /********************************************************************* * * Function : create_forged_referrer @@ -4503,7 +4764,14 @@ static jb_err handle_conditional_hide_referrer_parameter(char **header, static void create_content_length_header(unsigned long long content_length, char *header, size_t buffer_length) { +#ifdef _WIN32 +#if SIZEOF_LONG_LONG < 8 +#error sizeof(unsigned long long) too small +#endif + snprintf(header, buffer_length, "Content-Length: %I64u", content_length); +#else snprintf(header, buffer_length, "Content-Length: %llu", content_length); +#endif }