X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=filters.c;h=60d420faf60cee6512ab81746aa6eb2477775699;hp=ed5a3736d2feafca69d9e24e5c2a8584692711e0;hb=c88024b1f6a64ecc2d923ff875e10f15b64daff3;hpb=6270660d48a8c2dfe80c98c0bd802133899ccdd7 diff --git a/filters.c b/filters.c index ed5a3736..60d420fa 100644 --- a/filters.c +++ b/filters.c @@ -1,19 +1,11 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.129 2010/05/01 18:20:50 fabiankeil Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.175 2012/10/21 12:58:03 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ * * Purpose : Declares functions to parse/crunch headers and pages. - * Functions declared include: - * `acl_addr', `add_stats', `block_acl', `block_imageurl', - * `block_url', `url_actions', `domain_split', - * `filter_popups', `forward_url', 'redirect_url', - * `ij_untrusted_url', `intercept_url', `pcrs_filter_respose', - * `ijb_send_banner', `trust_url', `gif_deanimate_response', - * `execute_single_pcrs_command', `rewrite_url', - * `get_last_url' - * - * Copyright : Written by and Copyright (C) 2001-2010 the + * + * Copyright : Written by and Copyright (C) 2001-2011 the * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -50,11 +42,6 @@ const char filters_rcs[] = "$Id: filters.c,v 1.129 2010/05/01 18:20:50 fabiankei #include #include -#ifdef HAVE_RFC2553 -#include -#include -#endif /* def HAVE_RFC2553 */ - #ifndef _WIN32 #ifndef __OS2__ #include @@ -89,15 +76,8 @@ const char filters_rcs[] = "$Id: filters.c,v 1.129 2010/05/01 18:20:50 fabiankei const char filters_h_rcs[] = FILTERS_H_VERSION; -/* Fix a problem with Solaris. There should be no effect on other - * platforms. - * Solaris's isspace() is a macro which uses it's argument directly - * as an array index. Therefore we need to make sure that high-bit - * characters generate +ve values, and ideally we also want to make - * the argument match the declared parameter type of "int". - */ -#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X)) - +typedef char *(*filter_function_ptr)(); +static filter_function_ptr get_filter_function(const struct client_state *csp); static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size); static jb_err prepare_for_filtering(struct client_state *csp); @@ -196,10 +176,8 @@ static int match_sockaddr(const struct sockaddr_storage *network, if (network->ss_family != netmask->ss_family) { /* This should never happen */ - log_error(LOG_LEVEL_ERROR, - "Internal error at %s:%llu: network and netmask differ in family", - __FILE__, __LINE__); - return 0; + assert(network->ss_family == netmask->ss_family); + log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family."); } sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port); @@ -292,6 +270,10 @@ int block_acl(const struct access_control_addr *dst, const struct client_state * { return(0); } + else + { + return(1); + } } else if ( #ifdef HAVE_RFC2553 @@ -378,7 +360,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) if ((p = strchr(acl_spec, '/')) != NULL) { *p++ = '\0'; - if (ijb_isdigit(*p) == 0) + if (privoxy_isdigit(*p) == 0) { freez(acl_spec); return(-1); @@ -484,7 +466,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) * of octets (128-bit CPU could do it in one iteration). */ /* - * Octets after prefix can be ommitted because of + * Octets after prefix can be omitted because of * previous initialization to zeros. */ for (i = 0; (i < addr_len) && masklength; i++) @@ -573,7 +555,7 @@ struct http_response *block_url(struct client_state *csp) } if (csp->action->flags & ACTION_REDIRECT) { - log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block."); + log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block."); } /* * Else, prepare a response @@ -595,7 +577,7 @@ struct http_response *block_url(struct client_state *csp) /* determine HOW images should be blocked */ p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER]; - if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) + if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) { log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image."); } @@ -664,10 +646,10 @@ struct http_response *block_url(struct client_state *csp) } else #endif /* def FEATURE_IMAGE_BLOCKING */ - if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) + if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) { /* - * Send empty document. + * Send empty document. */ new_content_type = csp->action->string[ACTION_STRING_CONTENT_TYPE]; @@ -713,27 +695,8 @@ struct http_response *block_url(struct client_state *csp) { jb_err err; struct map * exports; - char *p; - - /* - * Workaround for stupid Netscape bug which prevents - * pages from being displayed if loading a referenced - * JavaScript or style sheet fails. So make it appear - * as if it succeeded. - */ - if ( NULL != (p = get_header_value(csp->headers, "User-Agent:")) - && !strncmpic(p, "mozilla", 7) /* Catch Netscape but */ - && !strstr(p, "Gecko") /* save Mozilla, */ - && !strstr(p, "compatible") /* MSIE */ - && !strstr(p, "Opera")) /* and Opera. */ - { - rsp->status = strdup("200 Request for blocked URL"); - } - else - { - rsp->status = strdup("403 Request for blocked URL"); - } + rsp->status = strdup("403 Request blocked by Privoxy"); if (rsp->status == NULL) { free_http_response(rsp); @@ -854,7 +817,7 @@ struct http_response *trust_url(struct client_state *csp) * Export the protocol, host, port, and referrer information */ err = map(exports, "hostport", 1, csp->http->hostport, 1); - if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1); + if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1); if (!err) err = map(exports, "path", 1, csp->http->path, 1); if (NULL != (p = get_header_value(csp->headers, "Referer:"))) @@ -974,7 +937,7 @@ struct http_response *trust_url(struct client_state *csp) * 2 : b = The filter list to compile * * Returns : NULL in case of errors, otherwise the - * pcrs job list. + * pcrs job list. * *********************************************************************/ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const struct re_filterfile_spec *b) @@ -1001,10 +964,9 @@ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const st dummy = pcrs_compile_dynamic_command(pattern->str, variables, &error); if (NULL == dummy) { - assert(error < 0); log_error(LOG_LEVEL_ERROR, - "Adding filter job \'%s\' to dynamic filter %s failed: %s", - pattern->str, b->name, pcrs_strerror(error)); + "Compiling dynamic pcrs job '%s' for '%s' failed with error code %d: %s", + pattern->str, b->name, error, pcrs_strerror(error)); continue; } else @@ -1044,7 +1006,7 @@ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const st * 2 : pcrs_command = pcrs command formatted as string (s@foo@bar@) * * - * Returns : NULL if the pcrs_command didn't change the url, or + * Returns : NULL if the pcrs_command didn't change the url, or * the result of the modification. * *********************************************************************/ @@ -1103,7 +1065,7 @@ char *rewrite_url(char *old_url, const char *pcrs_command) * * Parameters : * 1 : subject = the string to check - * 2 : redirect_mode = +fast-redirect{} mode + * 2 : redirect_mode = +fast-redirect{} mode * * Returns : NULL if no URL was found, or * the last URL found. @@ -1124,22 +1086,81 @@ char *get_last_url(char *subject, const char *redirect_mode) return NULL; } - if (0 == strcmpic(redirect_mode, "check-decoded-url")) + if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%')) { - log_error(LOG_LEVEL_REDIRECTS, "Decoding \"%s\" if necessary.", subject); - new_url = url_decode(subject); - if (new_url != NULL) + char *url_segment = NULL; + char **url_segments; + size_t max_segments; + int segments; + + log_error(LOG_LEVEL_REDIRECTS, + "Checking \"%s\" for encoded redirects.", subject); + + /* + * Check each parameter in the URL separately. + * Sectionize the URL at "?" and "&", + * go backwards through the segments, URL-decode them + * and look for a URL in the decoded result. + * Stop the search after the first match. + * + * XXX: This estimate is guaranteed to be high enough as we + * let ssplit() ignore empty fields, but also a bit wasteful. + */ + max_segments = strlen(subject) / 2; + url_segments = malloc(max_segments * sizeof(char *)); + + if (NULL == url_segments) { + log_error(LOG_LEVEL_ERROR, + "Out of memory while decoding URL: %s", subject); freez(subject); - subject = new_url; + return NULL; } - else + + segments = ssplit(subject, "?&", url_segments, max_segments); + + while (segments-- > 0) { - log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject); + char *dtoken = url_decode(url_segments[segments]); + if (NULL == dtoken) + { + log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]); + continue; + } + url_segment = strstr(dtoken, "http://"); + if (NULL == url_segment) + { + url_segment = strstr(dtoken, "https://"); + } + if (NULL != url_segment) + { + url_segment = strdup(url_segment); + freez(dtoken); + if (url_segment == NULL) + { + log_error(LOG_LEVEL_ERROR, + "Out of memory while searching for redirects."); + return NULL; + } + break; + } + freez(dtoken); } - } + freez(subject); + freez(url_segments); - log_error(LOG_LEVEL_REDIRECTS, "Checking \"%s\" for redirects.", subject); + if (url_segment == NULL) + { + return NULL; + } + subject = url_segment; + } + else + { + /* Look for a URL inside this one, without decoding anything. */ + log_error(LOG_LEVEL_REDIRECTS, + "Checking \"%s\" for unencoded redirects.", subject); + } /* * Find the last URL encoded in the request @@ -1162,7 +1183,7 @@ char *get_last_url(char *subject, const char *redirect_mode) )) { /* - * Return new URL if we found a redirect + * Return new URL if we found a redirect * or if the subject already was a URL. * * The second case makes sure that we can @@ -1268,9 +1289,24 @@ struct http_response *redirect_url(struct client_state *csp) #endif /* def FEATURE_FAST_REDIRECTS */ csp->action->flags &= ~ACTION_REDIRECT; - /* Did any redirect action trigger? */ + /* Did any redirect action trigger? */ if (new_url) { + if (url_requires_percent_encoding(new_url)) + { + char *encoded_url; + log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N", + strlen(new_url), new_url); + encoded_url = percent_encode_url(new_url); + freez(new_url); + if (encoded_url == NULL) + { + return cgi_error_memory(); + } + new_url = encoded_url; + assert(FALSE == url_requires_percent_encoding(new_url)); + } + if (0 == strcmpic(new_url, csp->http->url)) { log_error(LOG_LEVEL_ERROR, @@ -1288,8 +1324,8 @@ struct http_response *redirect_url(struct client_state *csp) return cgi_error_memory(); } - if ( enlist_unique_header(rsp->headers, "Location", new_url) - || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) ) + if (enlist_unique_header(rsp->headers, "Location", new_url) + || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy")))) { freez(new_url); free_http_response(rsp); @@ -1522,7 +1558,7 @@ static char *pcrs_filter_response(struct client_state *csp) struct re_filterfile_spec *b; struct list_entry *filtername; - /* + /* * Sanity first */ if (csp->iob->cur >= csp->iob->eod) @@ -1656,7 +1692,7 @@ static char *pcrs_filter_response(struct client_state *csp) csp->flags |= CSP_FLAG_MODIFIED; csp->content_length = size; - IOB_RESET(csp); + clear_iob(csp->iob); return(new); @@ -1738,48 +1774,10 @@ static char *gif_deanimate_response(struct client_state *csp) * NULL if no content filter is active * *********************************************************************/ -filter_function_ptr get_filter_function(struct client_state *csp) +static filter_function_ptr get_filter_function(const struct client_state *csp) { filter_function_ptr filter_function = NULL; - if ((csp->content_type & CT_TABOO) - && !(csp->action->flags & ACTION_FORCE_TEXT_MODE)) - { - return NULL; - } - - /* - * Are we enabling text mode by force? - */ - if (csp->action->flags & ACTION_FORCE_TEXT_MODE) - { - /* - * Do we really have to? - */ - if (csp->content_type & CT_TEXT) - { - log_error(LOG_LEVEL_HEADER, "Text mode is already enabled."); - } - else - { - csp->content_type |= CT_TEXT; - log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!"); - } - } - - if (!(csp->content_type & CT_DECLARED)) - { - /* - * The server didn't bother to declare a MIME-Type. - * Assume it's text that can be filtered. - * - * This also regulary happens with 304 responses, - * therefore logging anything here would cause - * too much noise. - */ - csp->content_type |= CT_TEXT; - } - /* * Choose the applying filter function based on * the content type and action settings. @@ -1840,13 +1838,15 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) return JB_ERR_PARSE; } - if ((newsize += chunksize) >= *size) + if (chunksize >= *size - newsize) { log_error(LOG_LEVEL_ERROR, - "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding", - chunksize, *size); + "Chunk size %u exceeds buffered data left. " + "Already digested %u of %u buffered bytes.", + chunksize, (unsigned int)newsize, (unsigned int)*size); return JB_ERR_PARSE; } + newsize += chunksize; from_p += 2; memmove(to_p, from_p, (size_t) chunksize); @@ -1859,7 +1859,7 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) break; } } - + /* XXX: Should get its own loglevel. */ log_error(LOG_LEVEL_RE_FILTER, "De-chunking successful. Shrunk from %d to %d", *size, newsize); @@ -1947,20 +1947,23 @@ static jb_err prepare_for_filtering(struct client_state *csp) /********************************************************************* * - * Function : execute_content_filter + * Function : execute_content_filters * * Description : Executes a given content filter. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) - * 2 : content_filter = The filter function to execute * * Returns : Pointer to the modified buffer, or * NULL if filtering failed or wasn't necessary. * *********************************************************************/ -char *execute_content_filter(struct client_state *csp, filter_function_ptr content_filter) +char *execute_content_filters(struct client_state *csp) { + filter_function_ptr content_filter; + + assert(content_filters_enabled(csp->action)); + if (0 == csp->iob->eod - csp->iob->cur) { /* @@ -1986,6 +1989,8 @@ char *execute_content_filter(struct client_state *csp, filter_function_ptr conte return NULL; } + content_filter = get_filter_function(csp); + return ((*content_filter)(csp)); } @@ -2125,7 +2130,7 @@ const static struct forward_spec *get_forward_override_settings(struct client_st return NULL; } - vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1); + vec_count = ssplit(forward_settings, " \t", vec, SZ(vec)); if ((vec_count == 2) && !strcasecmp(vec[0], "forward")) { fwd->type = SOCKS_NONE; @@ -2153,6 +2158,11 @@ const static struct forward_spec *get_forward_override_settings(struct client_st fwd->type = SOCKS_5; socks_proxy = vec[1]; } + else if (!strcasecmp(vec[0], "forward-socks5t")) + { + fwd->type = SOCKS_5T; + socks_proxy = vec[1]; + } if (NULL != socks_proxy) { @@ -2233,7 +2243,7 @@ const struct forward_spec *forward_url(struct client_state *csp, /********************************************************************* * - * Function : direct_response + * Function : direct_response * * Description : Check if Max-Forwards == 0 for an OPTIONS or TRACE * request and if so, return a HTTP 501 to the client. @@ -2242,7 +2252,7 @@ const struct forward_spec *forward_url(struct client_state *csp, * requests properly. Still, what we do here is rfc- * compliant, whereas ignoring or forwarding are not. * - * Parameters : + * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) * * Returns : http_response if , NULL if nonmatch or handler fail @@ -2258,7 +2268,7 @@ struct http_response *direct_response(struct client_state *csp) { for (p = csp->headers->first; (p != NULL) ; p = p->next) { - if (!strncmpic("Max-Forwards:", p->str, 13)) + if (!strncmpic(p->str, "Max-Forwards:", 13)) { unsigned int max_forwards; @@ -2282,7 +2292,7 @@ struct http_response *direct_response(struct client_state *csp) { return cgi_error_memory(); } - + if (NULL == (rsp->status = strdup("501 Not Implemented"))) { free_http_response(rsp); @@ -2301,6 +2311,81 @@ struct http_response *direct_response(struct client_state *csp) } +/********************************************************************* + * + * Function : content_requires_filtering + * + * Description : Checks whether there are any content filters + * enabled for the current request and if they + * can actually be applied.. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : TRUE for yes, FALSE otherwise + * + *********************************************************************/ +int content_requires_filtering(struct client_state *csp) +{ + if ((csp->content_type & CT_TABOO) + && !(csp->action->flags & ACTION_FORCE_TEXT_MODE)) + { + return FALSE; + } + + /* + * Are we enabling text mode by force? + */ + if (csp->action->flags & ACTION_FORCE_TEXT_MODE) + { + /* + * Do we really have to? + */ + if (csp->content_type & CT_TEXT) + { + log_error(LOG_LEVEL_HEADER, "Text mode is already enabled."); + } + else + { + csp->content_type |= CT_TEXT; + log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!"); + } + } + + if (!(csp->content_type & CT_DECLARED)) + { + /* + * The server didn't bother to declare a MIME-Type. + * Assume it's text that can be filtered. + * + * This also regulary happens with 304 responses, + * therefore logging anything here would cause + * too much noise. + */ + csp->content_type |= CT_TEXT; + } + + /* + * Choose the applying filter function based on + * the content type and action settings. + */ + if ((csp->content_type & CT_TEXT) && + (csp->rlist != NULL) && + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + { + return TRUE; + } + else if ((csp->content_type & CT_GIF) && + (csp->action->flags & ACTION_DEANIMATE)) + { + return TRUE; + } + + return FALSE; + +} + + /********************************************************************* * * Function : content_filters_enabled @@ -2308,7 +2393,7 @@ struct http_response *direct_response(struct client_state *csp) * Description : Checks whether there are any content filters * enabled for the current request. * - * Parameters : + * Parameters : * 1 : action = Action spec to check. * * Returns : TRUE for yes, FALSE otherwise