X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=parsers.c;h=45b4c3bf283e7d8a272060bacf98e4b364aa99a1;hp=86d7b7934f6dcc060bc0295b4083d7b9b8e110a5;hb=5c2780877ebeda989fa35631c3064e4e100ecdaa;hpb=4b70ff5ebf195bae5f3398957ddd6d948b435e44 diff --git a/parsers.c b/parsers.c index 86d7b793..45b4c3bf 100644 --- a/parsers.c +++ b/parsers.c @@ -1,12 +1,11 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.272 2012/12/09 12:27:01 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ * * Purpose : Declares functions to parse/crunch headers and pages. * - * Copyright : Written by and Copyright (C) 2001-2012 the - * Privoxy team. http://www.privoxy.org/ + * Copyright : Written by and Copyright (C) 2001-2020 the + * Privoxy team. https://www.privoxy.org/ * * Based on the Internet Junkbuster originally written * by and Copyright (C) 1997 Anonymous Coders and @@ -90,12 +89,11 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.272 2012/12/09 12:27:01 fabiankei #include "strptime.h" #endif -const char parsers_h_rcs[] = PARSERS_H_VERSION; - static char *get_header_line(struct iob *iob); static jb_err scan_headers(struct client_state *csp); static jb_err header_tagger(struct client_state *csp, char *header); static jb_err parse_header_time(const char *header_time, time_t *result); +static jb_err parse_time_header(const char *header, time_t *result); static jb_err crumble (struct client_state *csp, char **header); static jb_err filter_header (struct client_state *csp, char **header); @@ -116,6 +114,7 @@ static jb_err client_if_none_match (struct client_state *csp, char **header static jb_err crunch_client_header (struct client_state *csp, char **header); static jb_err client_x_filter (struct client_state *csp, char **header); static jb_err client_range (struct client_state *csp, char **header); +static jb_err client_expect (struct client_state *csp, char **header); static jb_err server_set_cookie (struct client_state *csp, char **header); static jb_err server_connection (struct client_state *csp, char **header); static jb_err server_content_type (struct client_state *csp, char **header); @@ -148,6 +147,7 @@ static jb_err server_connection_adder(struct client_state *csp); #ifdef FEATURE_CONNECTION_KEEP_ALIVE static jb_err server_proxy_connection_adder(struct client_state *csp); #endif /* def FEATURE_CONNECTION_KEEP_ALIVE */ +static jb_err proxy_authentication(struct client_state *csp, char **header); static jb_err create_forged_referrer(char **header, const char *hostport); static jb_err create_fake_referrer(char **header, const char *fake_referrer); @@ -198,9 +198,11 @@ static const struct parsers client_patterns[] = { { "Request-Range:", 14, client_range }, { "If-Range:", 9, client_range }, { "X-Filter:", 9, client_x_filter }, + { "Proxy-Authorization:", 20, proxy_authentication }, #if 0 { "Transfer-Encoding:", 18, client_transfer_encoding }, #endif + { "Expect:", 7, client_expect }, { "*", 0, crunch_client_header }, { "*", 0, filter_header }, { NULL, 0, NULL } @@ -223,6 +225,7 @@ static const struct parsers server_patterns[] = { { "Transfer-Encoding:", 18, server_transfer_coding }, { "content-disposition:", 20, server_content_disposition }, { "Last-Modified:", 14, server_last_modified }, + { "Proxy-Authenticate:", 19, proxy_authentication }, { "*", 0, crunch_server_header }, { "*", 0, filter_header }, { NULL, 0, NULL } @@ -246,13 +249,14 @@ static const add_header_func_ptr add_server_headers[] = { /********************************************************************* * - * Function : flush_socket + * Function : flush_iob * * Description : Write any pending "buffered" content. * * Parameters : * 1 : fd = file descriptor of the socket to read * 2 : iob = The I/O buffer to flush, usually csp->iob. + * 3 : delay = Number of milliseconds to delay the writes * * Returns : On success, the number of bytes written are returned (zero * indicates nothing was written). On error, -1 is returned, @@ -262,7 +266,7 @@ static const add_header_func_ptr add_server_headers[] = { * file, the results are not portable. * *********************************************************************/ -long flush_socket(jb_socket fd, struct iob *iob) +long flush_iob(jb_socket fd, struct iob *iob, unsigned int delay) { long len = iob->eod - iob->cur; @@ -271,7 +275,7 @@ long flush_socket(jb_socket fd, struct iob *iob) return(0); } - if (write_socket(fd, iob->cur, (size_t)len)) + if (write_socket_delayed(fd, iob->cur, (size_t)len, delay)) { return(-1); } @@ -381,7 +385,7 @@ jb_err add_to_iob(struct iob *iob, const size_t buffer_limit, char *src, long n) void clear_iob(struct iob *iob) { free(iob->buf); - memset(iob, '\0', sizeof(*iob));; + memset(iob, '\0', sizeof(*iob)); } @@ -415,8 +419,13 @@ jb_err decompress_iob(struct client_state *csp) int status; /* return status of the inflate() call */ z_stream zstr; /* used by calls to zlib */ +#ifdef FUZZ + assert(csp->iob->cur - csp->iob->buf >= 0); + assert(csp->iob->eod - csp->iob->cur >= 0); +#else assert(csp->iob->cur - csp->iob->buf > 0); assert(csp->iob->eod - csp->iob->cur > 0); +#endif bufsize = csp->iob->size; skip_size = (size_t)(csp->iob->cur - csp->iob->buf); @@ -430,7 +439,9 @@ jb_err decompress_iob(struct client_state *csp) * This is to protect the parsing of gzipped data, * but it should(?) be valid for deflated data also. */ - log_error(LOG_LEVEL_ERROR, "Buffer too small decompressing iob"); + log_error(LOG_LEVEL_ERROR, + "Insufficient data to start decompression. Bytes in buffer: %d", + csp->iob->eod - csp->iob->cur); return JB_ERR_COMPRESS; } @@ -622,6 +633,7 @@ jb_err decompress_iob(struct client_state *csp) if (bufsize >= csp->config->buffer_limit) { log_error(LOG_LEVEL_ERROR, "Buffer limit reached while decompressing iob"); + freez(buf); return JB_ERR_MEMORY; } @@ -710,7 +722,7 @@ jb_err decompress_iob(struct client_state *csp) * Make sure the new uncompressed iob obeys some minimal * consistency conditions. */ - if ((csp->iob->buf < csp->iob->cur) + if ((csp->iob->buf <= csp->iob->cur) && (csp->iob->cur <= csp->iob->eod) && (csp->iob->eod <= csp->iob->buf + csp->iob->size)) { @@ -747,15 +759,12 @@ jb_err decompress_iob(struct client_state *csp) * * Function : normalize_lws * - * Description : Reduces unquoted linear white space in headers - * to a single space in accordance with RFC 2616 2.2. + * Description : Reduces unquoted linear whitespace in headers to + * a single space in accordance with RFC 7230 3.2.4. * This simplifies parsing and filtering later on. * - * XXX: Remove log messages before - * the next stable release? - * * Parameters : - * 1 : header = A header with linear white space to reduce. + * 1 : header = A header with linear whitespace to reduce. * * Returns : N/A * @@ -774,7 +783,7 @@ static void normalize_lws(char *header) { q++; } - log_error(LOG_LEVEL_HEADER, "Reducing white space in '%s'", header); + log_error(LOG_LEVEL_HEADER, "Reducing whitespace in '%s'", header); string_move(p+1, q); } @@ -1094,6 +1103,7 @@ static void enforce_header_order(struct list *headers, const struct list *ordere return; } + /********************************************************************* * * Function : sed @@ -1112,7 +1122,8 @@ static void enforce_header_order(struct list *headers, const struct list *ordere * server and header filtering. * * Returns : JB_ERR_OK in case off success, or - * JB_ERR_MEMORY on out-of-memory error. + * JB_ERR_MEMORY on some out-of-memory errors, or + * JB_ERR_PARSE in case of fatal parse errors. * *********************************************************************/ jb_err sed(struct client_state *csp, int filter_server_headers) @@ -1123,22 +1134,24 @@ jb_err sed(struct client_state *csp, int filter_server_headers) const add_header_func_ptr *f; jb_err err = JB_ERR_OK; + scan_headers(csp); + if (filter_server_headers) { v = server_patterns; f = add_server_headers; + check_negative_tag_patterns(csp, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN); } else { v = client_patterns; f = add_client_headers; + check_negative_tag_patterns(csp, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN); } - scan_headers(csp); - - while ((err == JB_ERR_OK) && (v->str != NULL)) + while (v->str != NULL) { - for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next) + for (p = csp->headers->first; p != NULL; p = p->next) { /* Header crunch()ed in previous run? -> ignore */ if (p->str == NULL) continue; @@ -1148,6 +1161,10 @@ jb_err sed(struct client_state *csp, int filter_server_headers) (v->len == CHECK_EVERY_HEADER_REMAINING)) { err = v->parser(csp, &(p->str)); + if (err != JB_ERR_OK) + { + return err; + } } } v++; @@ -1169,6 +1186,59 @@ jb_err sed(struct client_state *csp, int filter_server_headers) } +#ifdef FEATURE_HTTPS_INSPECTION +/********************************************************************* + * + * Function : sed_https + * + * Description : add, delete or modify lines in the HTTPS client + * header streams. Wrapper around sed(). + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK in case off success, or + * JB_ERR_MEMORY on some out-of-memory errors, or + * JB_ERR_PARSE in case of fatal parse errors. + * + *********************************************************************/ +jb_err sed_https(struct client_state *csp) +{ + jb_err err; + struct list headers; + + /* + * Temporarly replace csp->headers with csp->https_headers + * to trick sed() into filtering the https headers. + */ + headers.first = csp->headers->first; + headers.last = csp->headers->last; + csp->headers->first = csp->https_headers->first; + csp->headers->last = csp->https_headers->last; + + /* + * Start with fresh tags. Already existing tags may + * be set again. This is necessary to overrule + * URL-based patterns. + */ + destroy_list(csp->tags); + + /* + * We want client header filters and taggers + * so temporarly remove the flag. + */ + csp->flags &= ~CSP_FLAG_CLIENT_HEADER_PARSING_DONE; + err = sed(csp, FILTER_CLIENT_HEADERS); + csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE; + + csp->headers->first = headers.first; + csp->headers->last = headers.last; + + return err; +} +#endif /* def FEATURE_HTTPS_INSPECTION */ + + /********************************************************************* * * Function : update_server_headers @@ -1271,12 +1341,10 @@ jb_err update_server_headers(struct client_state *csp) *********************************************************************/ static jb_err header_tagger(struct client_state *csp, char *header) { - int wanted_filter_type; + enum filter_type wanted_filter_type; int multi_action_index; - int i; pcrs_job *job; - struct file_list *fl; struct re_filterfile_spec *b; struct list_entry *tag_name; @@ -1293,151 +1361,127 @@ static jb_err header_tagger(struct client_state *csp, char *header) multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER; } - if (filters_available(csp) == FALSE) + if (list_is_empty(csp->action->multi[multi_action_index]) + || filters_available(csp) == FALSE) { - log_error(LOG_LEVEL_ERROR, "Inconsistent configuration: " - "tagging enabled, but no taggers available."); + /* Return early if no taggers apply or if none are available. */ return JB_ERR_OK; } - for (i = 0; i < MAX_AF_FILES; i++) + /* Execute all applying taggers */ + for (tag_name = csp->action->multi[multi_action_index]->first; + NULL != tag_name; tag_name = tag_name->next) { - fl = csp->rlist[i]; - if ((NULL == fl) || (NULL == fl->f)) + char *modified_tag = NULL; + char *tag = header; + size_t size = header_length; + pcrs_job *joblist; + + b = get_filter(csp, tag_name->str, wanted_filter_type); + if (b == NULL) { - /* - * Either there are no filter files - * left, or this filter file just - * contains no valid filters. - * - * Continue to be sure we don't miss - * valid filter files that are chained - * after empty or invalid ones. - */ continue; } - /* For all filters, */ - for (b = fl->f; b; b = b->next) + joblist = b->joblist; + + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); + + if (NULL == joblist) + { + log_error(LOG_LEVEL_RE_FILTER, + "Tagger %s has empty joblist. Nothing to do.", b->name); + continue; + } + + /* execute their pcrs_joblist on the header. */ + for (job = joblist; NULL != job; job = job->next) { - if (b->type != wanted_filter_type) + const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); + + if (0 < hits) { - /* skip the ones we don't care about, */ - continue; + /* Success, continue with the modified version. */ + if (tag != header) + { + freez(tag); + } + tag = modified_tag; } - /* leaving only taggers that could apply, of which we use the ones, */ - for (tag_name = csp->action->multi[multi_action_index]->first; - NULL != tag_name; tag_name = tag_name->next) + else { - /* that do apply, and */ - if (strcmp(b->name, tag_name->str) == 0) + /* Tagger doesn't match */ + if (0 > hits) { - char *modified_tag = NULL; - char *tag = header; - size_t size = header_length; - pcrs_job *joblist = b->joblist; + /* Regex failure, log it but continue anyway. */ + assert(NULL != header); + log_error(LOG_LEVEL_ERROR, + "Problems with tagger \'%s\' and header \'%s\': %s", + b->name, *header, pcrs_strerror(hits)); + } + freez(modified_tag); + } + } - if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); + if (b->dynamic) pcrs_free_joblist(joblist); + + /* If this tagger matched */ + if (tag != header) + { + if (0 == size) + { + /* + * There is no technical limitation which makes + * it impossible to use empty tags, but I assume + * no one would do it intentionally. + */ + freez(tag); + log_error(LOG_LEVEL_INFO, + "Tagger \'%s\' created an empty tag. Ignored.", b->name); + continue; + } - if (NULL == joblist) + if (!list_contains_item(csp->tags, tag)) + { + if (JB_ERR_OK != enlist(csp->tags, tag)) + { + log_error(LOG_LEVEL_ERROR, + "Insufficient memory to add tag \'%s\', " + "based on tagger \'%s\' and header \'%s\'", + tag, b->name, *header); + } + else + { + char *action_message; + /* + * update the action bits right away, to make + * tagging based on tags set by earlier taggers + * of the same kind possible. + */ + if (update_action_bits_for_tag(csp, tag)) { - log_error(LOG_LEVEL_RE_FILTER, - "Tagger %s has empty joblist. Nothing to do.", b->name); - continue; + action_message = "Action bits updated accordingly."; } - - /* execute their pcrs_joblist on the header. */ - for (job = joblist; NULL != job; job = job->next) + else { - const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); - - if (0 < hits) - { - /* Success, continue with the modified version. */ - if (tag != header) - { - freez(tag); - } - tag = modified_tag; - } - else - { - /* Tagger doesn't match */ - if (0 > hits) - { - /* Regex failure, log it but continue anyway. */ - assert(NULL != header); - log_error(LOG_LEVEL_ERROR, - "Problems with tagger \'%s\' and header \'%s\': %s", - b->name, *header, pcrs_strerror(hits)); - } - freez(modified_tag); - } + action_message = "No action bits update necessary."; } - if (b->dynamic) pcrs_free_joblist(joblist); - - /* If this tagger matched */ - if (tag != header) - { - if (0 == size) - { - /* - * There is to technical limitation which makes - * it impossible to use empty tags, but I assume - * no one would do it intentionally. - */ - freez(tag); - log_error(LOG_LEVEL_INFO, - "Tagger \'%s\' created an empty tag. Ignored.", - b->name); - continue; - } - - if (!list_contains_item(csp->tags, tag)) - { - if (JB_ERR_OK != enlist(csp->tags, tag)) - { - log_error(LOG_LEVEL_ERROR, - "Insufficient memory to add tag \'%s\', " - "based on tagger \'%s\' and header \'%s\'", - tag, b->name, *header); - } - else - { - char *action_message; - /* - * update the action bits right away, to make - * tagging based on tags set by earlier taggers - * of the same kind possible. - */ - if (update_action_bits_for_tag(csp, tag)) - { - action_message = "Action bits updated accordingly."; - } - else - { - action_message = "No action bits update necessary."; - } - - log_error(LOG_LEVEL_HEADER, - "Tagger \'%s\' added tag \'%s\'. %s", - b->name, tag, action_message); - } - } - else - { - /* XXX: Is this log-worthy? */ - log_error(LOG_LEVEL_HEADER, - "Tagger \'%s\' didn't add tag \'%s\'. " - "Tag already present", b->name, tag); - } - freez(tag); - } /* if the tagger matched */ - } /* if the tagger applies */ - } /* for every tagger that could apply */ - } /* for all filters */ - } /* for all filter files */ + log_error(LOG_LEVEL_HEADER, + "Tagger \'%s\' added tag \'%s\'. %s", + b->name, tag, action_message); + } + } + else + { + /* XXX: Is this log-worthy? */ + log_error(LOG_LEVEL_HEADER, + "Tagger \'%s\' didn't add tag \'%s\'. Tag already present", + b->name, tag); + } + freez(tag); + } + } return JB_ERR_OK; } @@ -1472,12 +1516,10 @@ static jb_err filter_header(struct client_state *csp, char **header) char *newheader = NULL; pcrs_job *job; - struct file_list *fl; struct re_filterfile_spec *b; struct list_entry *filtername; - int i; - int wanted_filter_type; + enum filter_type wanted_filter_type; int multi_action_index; if (csp->flags & CSP_FLAG_NO_FILTERING) @@ -1496,97 +1538,72 @@ static jb_err filter_header(struct client_state *csp, char **header) multi_action_index = ACTION_MULTI_CLIENT_HEADER_FILTER; } - if (filters_available(csp) == FALSE) + if (list_is_empty(csp->action->multi[multi_action_index]) + || filters_available(csp) == FALSE) { - log_error(LOG_LEVEL_ERROR, "Inconsistent configuration: " - "header filtering enabled, but no matching filters available."); + /* Return early if no filters apply or if none are available. */ return JB_ERR_OK; } - for (i = 0; i < MAX_AF_FILES; i++) + /* Execute all applying header filters */ + for (filtername = csp->action->multi[multi_action_index]->first; + filtername != NULL; filtername = filtername->next) { - fl = csp->rlist[i]; - if ((NULL == fl) || (NULL == fl->f)) + int current_hits = 0; + pcrs_job *joblist; + + b = get_filter(csp, filtername->str, wanted_filter_type); + if (b == NULL) { - /* - * Either there are no filter files - * left, or this filter file just - * contains no valid filters. - * - * Continue to be sure we don't miss - * valid filter files that are chained - * after empty or invalid ones. - */ continue; } - /* - * For all applying +filter actions, look if a filter by that - * name exists and if yes, execute its pcrs_joblist on the - * buffer. - */ - for (b = fl->f; b; b = b->next) - { - if (b->type != wanted_filter_type) - { - /* Skip other filter types */ - continue; - } - for (filtername = csp->action->multi[multi_action_index]->first; - filtername ; filtername = filtername->next) - { - if (strcmp(b->name, filtername->str) == 0) - { - int current_hits = 0; - pcrs_job *joblist = b->joblist; + joblist = b->joblist; - if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); - if (NULL == joblist) - { - log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name); - continue; - } - - log_error(LOG_LEVEL_RE_FILTER, "filtering \'%s\' (size %d) with \'%s\' ...", - *header, size, b->name); - - /* Apply all jobs from the joblist */ - for (job = joblist; NULL != job; job = job->next) - { - matches = pcrs_execute(job, *header, size, &newheader, &size); - if (0 < matches) - { - current_hits += matches; - log_error(LOG_LEVEL_HEADER, "Transforming \"%s\" to \"%s\"", *header, newheader); - freez(*header); - *header = newheader; - } - else if (0 == matches) - { - /* Filter doesn't change header */ - freez(newheader); - } - else - { - /* RegEx failure */ - log_error(LOG_LEVEL_ERROR, "Filtering \'%s\' with \'%s\' didn't work out: %s", - *header, b->name, pcrs_strerror(matches)); - if (newheader != NULL) - { - log_error(LOG_LEVEL_ERROR, "Freeing what's left: %s", newheader); - freez(newheader); - } - } - } + if (NULL == joblist) + { + log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name); + continue; + } - if (b->dynamic) pcrs_free_joblist(joblist); + log_error(LOG_LEVEL_RE_FILTER, "filtering \'%s\' (size %d) with \'%s\' ...", + *header, size, b->name); - log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size); - hits += current_hits; + /* Apply all jobs from the joblist */ + for (job = joblist; NULL != job; job = job->next) + { + matches = pcrs_execute(job, *header, size, &newheader, &size); + if (0 < matches) + { + current_hits += matches; + log_error(LOG_LEVEL_HEADER, "Transforming \"%s\" to \"%s\"", *header, newheader); + freez(*header); + *header = newheader; + } + else if (0 == matches) + { + /* Filter doesn't change header */ + freez(newheader); + } + else + { + /* RegEx failure */ + log_error(LOG_LEVEL_ERROR, "Filtering \'%s\' with \'%s\' didn't work out: %s", + *header, b->name, pcrs_strerror(matches)); + if (newheader != NULL) + { + log_error(LOG_LEVEL_ERROR, "Freeing what's left: %s", newheader); + freez(newheader); } } } + + if (b->dynamic) pcrs_free_joblist(joblist); + + log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size); + hits += current_hits; } /* @@ -1705,6 +1722,8 @@ static jb_err server_keep_alive(struct client_state *csp, char **header) csp->flags |= CSP_FLAG_SERVER_KEEP_ALIVE_TIMEOUT_SET; } + freez(*header); + return JB_ERR_OK; } @@ -1733,6 +1752,36 @@ static jb_err server_proxy_connection(struct client_state *csp, char **header) } +/********************************************************************* + * + * Function : proxy_authentication + * + * Description : Removes headers that are relevant for proxy + * authentication unless forwarding them has + * been explicitly requested. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = On input, pointer to header to modify. + * On output, pointer to the modified header, or NULL + * to remove the header. This function frees the + * original string if necessary. + * + * Returns : JB_ERR_OK. + * + *********************************************************************/ +static jb_err proxy_authentication(struct client_state *csp, char **header) +{ + if ((csp->config->feature_flags & + RUNTIME_FEATURE_FORWARD_PROXY_AUTHENTICATION_HEADERS) == 0) { + log_error(LOG_LEVEL_HEADER, + "Forwarding proxy authentication headers is disabled. Crunching: %s", *header); + freez(*header); + } + return JB_ERR_OK; +} + + /********************************************************************* * * Function : client_keep_alive @@ -1752,7 +1801,7 @@ static jb_err server_proxy_connection(struct client_state *csp, char **header) static jb_err client_keep_alive(struct client_state *csp, char **header) { unsigned int keep_alive_timeout; - const char *timeout_position = strstr(*header, ": "); + char *timeout_position; if (!(csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE)) { @@ -1762,29 +1811,41 @@ static jb_err client_keep_alive(struct client_state *csp, char **header) return JB_ERR_OK; } + /* Check for parameter-less format "Keep-Alive: 100" */ + timeout_position = strstr(*header, ": "); if ((NULL == timeout_position) || (1 != sscanf(timeout_position, ": %u", &keep_alive_timeout))) { - log_error(LOG_LEVEL_ERROR, "Couldn't parse: %s", *header); - } - else - { - if (keep_alive_timeout < csp->config->keep_alive_timeout) + /* Assume parameter format "Keep-Alive: timeout=100" */ + timeout_position = strstr(*header, "timeout="); + if ((NULL == timeout_position) + || (1 != sscanf(timeout_position, "timeout=%u", &keep_alive_timeout))) { log_error(LOG_LEVEL_HEADER, - "Reducing keep-alive timeout from %u to %u.", - csp->config->keep_alive_timeout, keep_alive_timeout); - csp->server_connection.keep_alive_timeout = keep_alive_timeout; - } - else - { - /* XXX: Is this log worthy? */ - log_error(LOG_LEVEL_HEADER, - "Client keep-alive timeout is %u. Sticking with %u.", - keep_alive_timeout, csp->config->keep_alive_timeout); + "Couldn't parse: '%s'. Using default timeout %u", + *header, csp->config->keep_alive_timeout); + freez(*header); + + return JB_ERR_OK; } } + if (keep_alive_timeout < csp->config->keep_alive_timeout) + { + log_error(LOG_LEVEL_HEADER, + "Reducing keep-alive timeout from %u to %u.", + csp->config->keep_alive_timeout, keep_alive_timeout); + csp->server_connection.keep_alive_timeout = keep_alive_timeout; + } + else + { + /* XXX: Is this log worthy? */ + log_error(LOG_LEVEL_HEADER, + "Client keep-alive timeout is %u. Sticking with %u.", + keep_alive_timeout, csp->config->keep_alive_timeout); + freez(*header); + } + return JB_ERR_OK; } @@ -1807,7 +1868,9 @@ static jb_err client_keep_alive(struct client_state *csp, char **header) static jb_err get_content_length(const char *header_value, unsigned long long *length) { #ifdef _WIN32 - assert(sizeof(unsigned long long) > 4); +#if SIZEOF_LONG_LONG < 8 +#error sizeof(unsigned long long) too small +#endif if (1 != sscanf(header_value, "%I64u", length)) #else if (1 != sscanf(header_value, "%llu", length)) @@ -2014,6 +2077,40 @@ jb_err client_transfer_encoding(struct client_state *csp, char **header) } +/********************************************************************* + * + * Function : client_expect + * + * Description : Raise the CSP_FLAG_UNSUPPORTED_CLIENT_EXPECTATION + * if the Expect header value is unsupported. + * + * Rejecting unsupported expectations is a RFC 7231 5.1.1 + * MAY and a RFC 2616 (obsolete) MUST. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = On input, pointer to header to modify. + * On output, pointer to the modified header, or NULL + * to remove the header. This function frees the + * original string if necessary. + * + * Returns : JB_ERR_OK on success, or + * + *********************************************************************/ +jb_err client_expect(struct client_state *csp, char **header) +{ + if (0 != strcmpic(*header, "Expect: 100-continue")) + { + csp->flags |= CSP_FLAG_UNSUPPORTED_CLIENT_EXPECTATION; + log_error(LOG_LEVEL_HEADER, + "Unsupported client expectaction: %s", *header); + } + + return JB_ERR_OK; + +} + + /********************************************************************* * * Function : crumble @@ -2105,11 +2202,25 @@ static jb_err server_content_type(struct client_state *csp, char **header) /* Remove header if it isn't the first Content-Type header */ if ((csp->content_type & CT_DECLARED)) { - log_error(LOG_LEVEL_ERROR, - "Multiple Content-Type headers. Removing and ignoring: \'%s\'", - *header); - freez(*header); - + if (content_filters_enabled(csp->action)) + { + /* + * Making sure the client interprets the content the same way + * Privoxy did is only relevant if Privoxy modified it. + * + * Checking for this is "hard" as it's not yet known when + * this function is called, thus go shopping and and just + * check if Privoxy could filter it. + * + * The main thing is that we don't mess with the headers + * unless the user signalled that it's acceptable. + */ + log_error(LOG_LEVEL_HEADER, + "Multiple Content-Type headers detected. " + "Removing and ignoring: %s", + *header); + freez(*header); + } return JB_ERR_OK; } @@ -2148,11 +2259,12 @@ static jb_err server_content_type(struct client_state *csp, char **header) */ if ((csp->content_type & CT_TEXT) || (csp->action->flags & ACTION_FORCE_TEXT_MODE)) { + jb_err err; freez(*header); *header = strdup_or_die("Content-Type: "); - string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]); - if (header == NULL) + err = string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]); + if (JB_ERR_OK != err) { log_error(LOG_LEVEL_HEADER, "Insufficient memory to replace Content-Type!"); return JB_ERR_MEMORY; @@ -2274,8 +2386,7 @@ static jb_err server_content_encoding(struct client_state *csp, char **header) /* * Log a warning if the user expects the content to be filtered. */ - if ((csp->rlist != NULL) && - (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + if (content_filters_enabled(csp->action)) { log_error(LOG_LEVEL_INFO, "SDCH-compressed content detected, content filtering disabled. " @@ -2320,8 +2431,7 @@ static jb_err server_content_encoding(struct client_state *csp, char **header) /* * Log a warning if the user expects the content to be filtered. */ - if ((csp->rlist != NULL) && - (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + if (content_filters_enabled(csp->action)) { log_error(LOG_LEVEL_INFO, "Compressed content detected, content filtering disabled. " @@ -2619,13 +2729,12 @@ static jb_err server_last_modified(struct client_state *csp, char **header) } else if (0 == strcmpic(newval, "randomize")) { - const char *header_time = *header + sizeof("Last-Modified:"); - log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header); - if (JB_ERR_OK != parse_header_time(header_time, &last_modified)) + if (JB_ERR_OK != parse_time_header(*header, &last_modified)) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header); + log_error(LOG_LEVEL_HEADER, + "Couldn't parse time in %s (crunching!)", *header); freez(*header); } else @@ -3221,9 +3330,6 @@ static jb_err client_max_forwards(struct client_state *csp, char **header) * port information, parse and evaluate the Host * header field. * - * Also, kill ill-formed HOST: headers as sent by - * Apple's iTunes software when used with a proxy. - * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) * 2 : header = On input, pointer to header to modify. @@ -3239,14 +3345,9 @@ static jb_err client_host(struct client_state *csp, char **header) { char *p, *q; - /* - * If the header field name is all upper-case, chances are that it's - * an ill-formed one from iTunes. BTW, killing innocent headers here is - * not a problem -- they are regenerated later. - */ - if ((*header)[1] == 'O') + if (strlen(*header) < 7) { - log_error(LOG_LEVEL_HEADER, "Killed all-caps Host header line: %s", *header); + log_error(LOG_LEVEL_HEADER, "Removing empty Host header"); freez(*header); return JB_ERR_OK; } @@ -3337,11 +3438,10 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) } else /* add random value */ { - const char *header_time = *header + sizeof("If-Modified-Since:"); - - if (JB_ERR_OK != parse_header_time(header_time, &tm)) + if (JB_ERR_OK != parse_time_header(*header, &tm)) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header); + log_error(LOG_LEVEL_HEADER, + "Couldn't parse time in %s (crunching!)", *header); freez(*header); } else @@ -3565,9 +3665,8 @@ static jb_err client_host_adder(struct client_state *csp) if (!csp->http->hostport || !*(csp->http->hostport)) { - /* XXX: When does this happen and why is it OK? */ - log_error(LOG_LEVEL_INFO, "Weirdness in client_host_adder detected and ignored."); - return JB_ERR_OK; + log_error(LOG_LEVEL_ERROR, "Destination host unknown."); + return JB_ERR_PARSE; } /* @@ -3760,7 +3859,8 @@ static jb_err server_proxy_connection_adder(struct client_state *csp) * Function : client_connection_header_adder * * Description : Adds a proper "Connection:" header to csp->headers - * unless the header was already present. Called from `sed'. + * unless the header was already present or it's a + * CONNECT request. Called from `sed'. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3779,10 +3879,20 @@ static jb_err client_connection_header_adder(struct client_state *csp) return JB_ERR_OK; } + /* + * In case of CONNECT requests "Connection: close" is implied, + * but actually setting the header has been reported to cause + * problems with some forwarding proxies that close the + * connection prematurely. + */ + if (csp->http->ssl != 0) + { + return JB_ERR_OK; + } + #ifdef FEATURE_CONNECTION_KEEP_ALIVE if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE) && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED) - && (csp->http->ssl == 0) && !strcmpic(csp->http->ver, "HTTP/1.1")) { csp->flags |= CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE; @@ -3805,6 +3915,7 @@ static jb_err client_connection_header_adder(struct client_state *csp) * is a partial range (HTTP status 206) * - Rewrite HTTP/1.1 answers to HTTP/1.0 if +downgrade * action applies. + * - Normalize the HTTP-version. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3814,36 +3925,84 @@ static jb_err client_connection_header_adder(struct client_state *csp) * original string if necessary. * * Returns : JB_ERR_OK on success, or - * JB_ERR_MEMORY on out-of-memory error. + * JB_ERR_PARSE on fatal parse errors. * *********************************************************************/ static jb_err server_http(struct client_state *csp, char **header) { - sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status)); + char *reason_phrase = NULL; + char *new_response_line; + char *p; + size_t length; + unsigned int major_version; + unsigned int minor_version; + + /* Get the reason phrase which start after the second whitespace */ + p = strchr(*header, ' '); + if (NULL != p) + { + p++; + reason_phrase = strchr(p, ' '); + } + + if (reason_phrase != NULL) + { + reason_phrase++; + } + else + { + log_error(LOG_LEVEL_ERROR, + "Response line lacks reason phrase: %s", *header); + reason_phrase=""; + } + + if (3 != sscanf(*header, "HTTP/%u.%u %d", &major_version, + &minor_version, &(csp->http->status))) + { + log_error(LOG_LEVEL_ERROR, + "Failed to parse the response line: %s", *header); + return JB_ERR_PARSE; + } + if (csp->http->status == 206) { csp->content_type = CT_TABOO; } - if ((csp->action->flags & ACTION_DOWNGRADE) != 0) + if (major_version != 1 || (minor_version != 0 && minor_version != 1)) { - /* XXX: Should we do a real validity check here? */ - if (strlen(*header) > 8) - { - (*header)[7] = '0'; - log_error(LOG_LEVEL_HEADER, "Downgraded answer to HTTP/1.0"); - } - else - { - /* - * XXX: Should we block the request or - * enlist a valid status code line here? - */ - log_error(LOG_LEVEL_INFO, "Malformed server response detected. " - "Downgrading to HTTP/1.0 impossible."); - } + /* + * According to RFC 7230 2.6 intermediaries MUST send + * their own HTTP-version in forwarded messages. + */ + log_error(LOG_LEVEL_ERROR, + "Unsupported HTTP version. Downgrading to 1.1."); + major_version = 1; + minor_version = 1; + } + + if (((csp->action->flags & ACTION_DOWNGRADE) != 0) && (minor_version == 1)) + { + log_error(LOG_LEVEL_HEADER, "Downgrading answer to HTTP/1.0"); + minor_version = 0; + } + + /* Rebuild response line. */ + length = sizeof("HTTP/1.1 200 ") + strlen(reason_phrase) + 1; + new_response_line = malloc_or_die(length); + + snprintf(new_response_line, length, "HTTP/%u.%u %d %s", + major_version, minor_version, csp->http->status, reason_phrase); + + if (0 != strcmp(*header, new_response_line)) + { + log_error(LOG_LEVEL_HEADER, "Response line '%s' changed to '%s'", + *header, new_response_line); } + freez(*header); + *header = new_response_line; + return JB_ERR_OK; } @@ -4224,7 +4383,13 @@ static jb_err parse_header_time(const char *header_time, time_t *result) time_t result2; tm = gmtime(result); - strftime(recreated_date, sizeof(recreated_date), time_formats[i], tm); + if (!strftime(recreated_date, sizeof(recreated_date), + time_formats[i], tm)) + { + log_error(LOG_LEVEL_ERROR, "Failed to recreate date '%s' with '%s'.", + header_time, time_formats[i]); + continue; + } memset(&gmt, 0, sizeof(gmt)); if (NULL == strptime(recreated_date, time_formats[i], &gmt)) { @@ -4252,6 +4417,44 @@ static jb_err parse_header_time(const char *header_time, time_t *result) } +/********************************************************************* + * + * Function : parse_time_header + * + * Description : Parses the time in an HTTP time header to get + * the numerical respresentation. + * + * Parameters : + * 1 : header = HTTP header with a time value + * 2 : result = storage for header_time in seconds + * + * Returns : JB_ERR_OK if the time format was recognized, or + * JB_ERR_PARSE otherwise. + * + *********************************************************************/ +static jb_err parse_time_header(const char *header, time_t *result) +{ + const char *header_time; + + header_time = strchr(header, ':'); + + /* + * Currently this can't happen as all callers are called + * through sed() which requires a header name followed by + * a colon. + */ + assert(header_time != NULL); + + header_time++; + if (*header_time == ' ') + { + header_time++; + } + + return parse_header_time(header_time, result); + +} + /********************************************************************* * @@ -4305,12 +4508,12 @@ jb_err get_destination_from_headers(const struct list *headers, struct http_requ } else { - http->port = http->ssl ? 443 : 80; + http->port = 80; } /* Rebuild request URL */ freez(http->url); - http->url = strdup(http->ssl ? "https://" : "http://"); + http->url = strdup("http://"); string_append(&http->url, http->hostport); string_append(&http->url, http->path); if (http->url == NULL) @@ -4318,12 +4521,111 @@ jb_err get_destination_from_headers(const struct list *headers, struct http_requ return JB_ERR_MEMORY; } - log_error(LOG_LEVEL_HEADER, "Destination extracted from \"Host:\" header. New request URL: %s", + log_error(LOG_LEVEL_HEADER, + "Destination extracted from \"Host\" header. New request URL: %s", + http->url); + + /* + * Regenerate request line in "proxy format" + * to make rewrites more convenient. + */ + assert(http->cmd != NULL); + freez(http->cmd); + http->cmd = strdup_or_die(http->gpc); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->url); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->ver); + if (http->cmd == NULL) + { + return JB_ERR_MEMORY; + } + + return JB_ERR_OK; + +} + + +#ifdef FEATURE_HTTPS_INSPECTION +/********************************************************************* + * + * Function : get_destination_from_https_headers + * + * Description : Parse the previously encrypted "Host:" header to + * get the request's destination. + * + * Parameters : + * 1 : headers = List of headers (one of them hopefully being + * the "Host:" header) + * 2 : http = storage for the result (host, port and hostport). + * + * Returns : JB_ERR_MEMORY (or terminates) in case of memory problems, + * JB_ERR_PARSE if the host header couldn't be found, + * JB_ERR_OK otherwise. + * + *********************************************************************/ +jb_err get_destination_from_https_headers(const struct list *headers, struct http_request *http) +{ + char *q; + char *p; + char *host; + + host = get_header_value(headers, "Host:"); + + if (NULL == host) + { + log_error(LOG_LEVEL_ERROR, "No \"Host:\" header found."); + return JB_ERR_PARSE; + } + + p = strdup_or_die(host); + chomp(p); + q = strdup_or_die(p); + + freez(http->hostport); + http->hostport = p; + freez(http->host); + http->host = q; + q = strchr(http->host, ':'); + if (q != NULL) + { + /* Terminate hostname and evaluate port string */ + *q++ = '\0'; + http->port = atoi(q); + } + else + { + http->port = 443; + } + + /* Rebuild request URL */ + freez(http->url); + http->url = strdup_or_die(http->path); + + log_error(LOG_LEVEL_HEADER, + "Destination extracted from \"Host\" header. New request URL: %s", http->url); + /* + * Regenerate request line in "proxy format" + * to make rewrites more convenient. + */ + assert(http->cmd != NULL); + freez(http->cmd); + http->cmd = strdup_or_die(http->gpc); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->url); + string_append(&http->cmd, " "); + string_append(&http->cmd, http->ver); + if (http->cmd == NULL) + { + return JB_ERR_MEMORY; + } + return JB_ERR_OK; } +#endif /* def FEATURE_HTTPS_INSPECTION */ /********************************************************************* @@ -4476,7 +4778,14 @@ static jb_err handle_conditional_hide_referrer_parameter(char **header, static void create_content_length_header(unsigned long long content_length, char *header, size_t buffer_length) { +#ifdef _WIN32 +#if SIZEOF_LONG_LONG < 8 +#error sizeof(unsigned long long) too small +#endif + snprintf(header, buffer_length, "Content-Length: %I64u", content_length); +#else snprintf(header, buffer_length, "Content-Length: %llu", content_length); +#endif }