X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=filters.c;h=60d420faf60cee6512ab81746aa6eb2477775699;hp=17918e94d6de51d1785e905f7a2f5b7c04e31019;hb=d880f1bc9f9c75a7d33e16b5fba191af96769248;hpb=1b2cc4af84a3bc3ba01ba913ca30012c9efa31d8 diff --git a/filters.c b/filters.c index 17918e94..60d420fa 100644 --- a/filters.c +++ b/filters.c @@ -1,19 +1,11 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.141 2011/03/08 18:32:11 fabiankeil Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.175 2012/10/21 12:58:03 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ * * Purpose : Declares functions to parse/crunch headers and pages. - * Functions declared include: - * `acl_addr', `add_stats', `block_acl', `block_imageurl', - * `block_url', `url_actions', `domain_split', - * `filter_popups', `forward_url', 'redirect_url', - * `ij_untrusted_url', `intercept_url', `pcrs_filter_respose', - * `ijb_send_banner', `trust_url', `gif_deanimate_response', - * `execute_single_pcrs_command', `rewrite_url', - * `get_last_url' - * - * Copyright : Written by and Copyright (C) 2001-2010 the + * + * Copyright : Written by and Copyright (C) 2001-2011 the * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -84,15 +76,6 @@ const char filters_rcs[] = "$Id: filters.c,v 1.141 2011/03/08 18:32:11 fabiankei const char filters_h_rcs[] = FILTERS_H_VERSION; -/* Fix a problem with Solaris. There should be no effect on other - * platforms. - * Solaris's isspace() is a macro which uses it's argument directly - * as an array index. Therefore we need to make sure that high-bit - * characters generate +ve values, and ideally we also want to make - * the argument match the declared parameter type of "int". - */ -#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X)) - typedef char *(*filter_function_ptr)(); static filter_function_ptr get_filter_function(const struct client_state *csp); static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size); @@ -193,10 +176,8 @@ static int match_sockaddr(const struct sockaddr_storage *network, if (network->ss_family != netmask->ss_family) { /* This should never happen */ - log_error(LOG_LEVEL_ERROR, - "Internal error at %s:%llu: network and netmask differ in family", - __FILE__, __LINE__); - return 0; + assert(network->ss_family == netmask->ss_family); + log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family."); } sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port); @@ -379,7 +360,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) if ((p = strchr(acl_spec, '/')) != NULL) { *p++ = '\0'; - if (ijb_isdigit(*p) == 0) + if (privoxy_isdigit(*p) == 0) { freez(acl_spec); return(-1); @@ -574,7 +555,7 @@ struct http_response *block_url(struct client_state *csp) } if (csp->action->flags & ACTION_REDIRECT) { - log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block."); + log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block."); } /* * Else, prepare a response @@ -596,7 +577,7 @@ struct http_response *block_url(struct client_state *csp) /* determine HOW images should be blocked */ p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER]; - if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) + if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) { log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image."); } @@ -665,10 +646,10 @@ struct http_response *block_url(struct client_state *csp) } else #endif /* def FEATURE_IMAGE_BLOCKING */ - if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) + if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) { /* - * Send empty document. + * Send empty document. */ new_content_type = csp->action->string[ACTION_STRING_CONTENT_TYPE]; @@ -714,27 +695,8 @@ struct http_response *block_url(struct client_state *csp) { jb_err err; struct map * exports; - char *p; - - /* - * Workaround for stupid Netscape bug which prevents - * pages from being displayed if loading a referenced - * JavaScript or style sheet fails. So make it appear - * as if it succeeded. - */ - if ( NULL != (p = get_header_value(csp->headers, "User-Agent:")) - && !strncmpic(p, "mozilla", 7) /* Catch Netscape but */ - && !strstr(p, "Gecko") /* save Mozilla, */ - && !strstr(p, "compatible") /* MSIE */ - && !strstr(p, "Opera")) /* and Opera. */ - { - rsp->status = strdup("200 Request blocked by Privoxy"); - } - else - { - rsp->status = strdup("403 Request blocked by Privoxy"); - } + rsp->status = strdup("403 Request blocked by Privoxy"); if (rsp->status == NULL) { free_http_response(rsp); @@ -855,7 +817,7 @@ struct http_response *trust_url(struct client_state *csp) * Export the protocol, host, port, and referrer information */ err = map(exports, "hostport", 1, csp->http->hostport, 1); - if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1); + if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1); if (!err) err = map(exports, "path", 1, csp->http->path, 1); if (NULL != (p = get_header_value(csp->headers, "Referer:"))) @@ -975,7 +937,7 @@ struct http_response *trust_url(struct client_state *csp) * 2 : b = The filter list to compile * * Returns : NULL in case of errors, otherwise the - * pcrs job list. + * pcrs job list. * *********************************************************************/ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const struct re_filterfile_spec *b) @@ -1044,7 +1006,7 @@ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const st * 2 : pcrs_command = pcrs command formatted as string (s@foo@bar@) * * - * Returns : NULL if the pcrs_command didn't change the url, or + * Returns : NULL if the pcrs_command didn't change the url, or * the result of the modification. * *********************************************************************/ @@ -1103,7 +1065,7 @@ char *rewrite_url(char *old_url, const char *pcrs_command) * * Parameters : * 1 : subject = the string to check - * 2 : redirect_mode = +fast-redirect{} mode + * 2 : redirect_mode = +fast-redirect{} mode * * Returns : NULL if no URL was found, or * the last URL found. @@ -1124,22 +1086,81 @@ char *get_last_url(char *subject, const char *redirect_mode) return NULL; } - if (0 == strcmpic(redirect_mode, "check-decoded-url")) + if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%')) { - log_error(LOG_LEVEL_REDIRECTS, "Decoding \"%s\" if necessary.", subject); - new_url = url_decode(subject); - if (new_url != NULL) + char *url_segment = NULL; + char **url_segments; + size_t max_segments; + int segments; + + log_error(LOG_LEVEL_REDIRECTS, + "Checking \"%s\" for encoded redirects.", subject); + + /* + * Check each parameter in the URL separately. + * Sectionize the URL at "?" and "&", + * go backwards through the segments, URL-decode them + * and look for a URL in the decoded result. + * Stop the search after the first match. + * + * XXX: This estimate is guaranteed to be high enough as we + * let ssplit() ignore empty fields, but also a bit wasteful. + */ + max_segments = strlen(subject) / 2; + url_segments = malloc(max_segments * sizeof(char *)); + + if (NULL == url_segments) { + log_error(LOG_LEVEL_ERROR, + "Out of memory while decoding URL: %s", subject); freez(subject); - subject = new_url; + return NULL; } - else + + segments = ssplit(subject, "?&", url_segments, max_segments); + + while (segments-- > 0) + { + char *dtoken = url_decode(url_segments[segments]); + if (NULL == dtoken) + { + log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]); + continue; + } + url_segment = strstr(dtoken, "http://"); + if (NULL == url_segment) + { + url_segment = strstr(dtoken, "https://"); + } + if (NULL != url_segment) + { + url_segment = strdup(url_segment); + freez(dtoken); + if (url_segment == NULL) + { + log_error(LOG_LEVEL_ERROR, + "Out of memory while searching for redirects."); + return NULL; + } + break; + } + freez(dtoken); + } + freez(subject); + freez(url_segments); + + if (url_segment == NULL) { - log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject); + return NULL; } + subject = url_segment; + } + else + { + /* Look for a URL inside this one, without decoding anything. */ + log_error(LOG_LEVEL_REDIRECTS, + "Checking \"%s\" for unencoded redirects.", subject); } - - log_error(LOG_LEVEL_REDIRECTS, "Checking \"%s\" for redirects.", subject); /* * Find the last URL encoded in the request @@ -1162,7 +1183,7 @@ char *get_last_url(char *subject, const char *redirect_mode) )) { /* - * Return new URL if we found a redirect + * Return new URL if we found a redirect * or if the subject already was a URL. * * The second case makes sure that we can @@ -1268,9 +1289,24 @@ struct http_response *redirect_url(struct client_state *csp) #endif /* def FEATURE_FAST_REDIRECTS */ csp->action->flags &= ~ACTION_REDIRECT; - /* Did any redirect action trigger? */ + /* Did any redirect action trigger? */ if (new_url) { + if (url_requires_percent_encoding(new_url)) + { + char *encoded_url; + log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N", + strlen(new_url), new_url); + encoded_url = percent_encode_url(new_url); + freez(new_url); + if (encoded_url == NULL) + { + return cgi_error_memory(); + } + new_url = encoded_url; + assert(FALSE == url_requires_percent_encoding(new_url)); + } + if (0 == strcmpic(new_url, csp->http->url)) { log_error(LOG_LEVEL_ERROR, @@ -1288,8 +1324,8 @@ struct http_response *redirect_url(struct client_state *csp) return cgi_error_memory(); } - if ( enlist_unique_header(rsp->headers, "Location", new_url) - || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) ) + if (enlist_unique_header(rsp->headers, "Location", new_url) + || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy")))) { freez(new_url); free_http_response(rsp); @@ -1522,7 +1558,7 @@ static char *pcrs_filter_response(struct client_state *csp) struct re_filterfile_spec *b; struct list_entry *filtername; - /* + /* * Sanity first */ if (csp->iob->cur >= csp->iob->eod) @@ -1656,7 +1692,7 @@ static char *pcrs_filter_response(struct client_state *csp) csp->flags |= CSP_FLAG_MODIFIED; csp->content_length = size; - IOB_RESET(csp); + clear_iob(csp->iob); return(new); @@ -1802,19 +1838,15 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) return JB_ERR_PARSE; } - if ((newsize += chunksize) >= *size) + if (chunksize >= *size - newsize) { - /* - * XXX: The message is a bit confusing. Isn't the real problem that - * the specified chunk size is greater than the number of bytes - * left in the buffer? This probably means the connection got - * closed prematurely. To be investigated after 3.0.17 is out. - */ log_error(LOG_LEVEL_ERROR, - "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding", - chunksize, *size); + "Chunk size %u exceeds buffered data left. " + "Already digested %u of %u buffered bytes.", + chunksize, (unsigned int)newsize, (unsigned int)*size); return JB_ERR_PARSE; } + newsize += chunksize; from_p += 2; memmove(to_p, from_p, (size_t) chunksize); @@ -1827,7 +1859,7 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) break; } } - + /* XXX: Should get its own loglevel. */ log_error(LOG_LEVEL_RE_FILTER, "De-chunking successful. Shrunk from %d to %d", *size, newsize); @@ -2098,7 +2130,7 @@ const static struct forward_spec *get_forward_override_settings(struct client_st return NULL; } - vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1); + vec_count = ssplit(forward_settings, " \t", vec, SZ(vec)); if ((vec_count == 2) && !strcasecmp(vec[0], "forward")) { fwd->type = SOCKS_NONE; @@ -2126,6 +2158,11 @@ const static struct forward_spec *get_forward_override_settings(struct client_st fwd->type = SOCKS_5; socks_proxy = vec[1]; } + else if (!strcasecmp(vec[0], "forward-socks5t")) + { + fwd->type = SOCKS_5T; + socks_proxy = vec[1]; + } if (NULL != socks_proxy) { @@ -2206,7 +2243,7 @@ const struct forward_spec *forward_url(struct client_state *csp, /********************************************************************* * - * Function : direct_response + * Function : direct_response * * Description : Check if Max-Forwards == 0 for an OPTIONS or TRACE * request and if so, return a HTTP 501 to the client. @@ -2215,7 +2252,7 @@ const struct forward_spec *forward_url(struct client_state *csp, * requests properly. Still, what we do here is rfc- * compliant, whereas ignoring or forwarding are not. * - * Parameters : + * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) * * Returns : http_response if , NULL if nonmatch or handler fail @@ -2231,7 +2268,7 @@ struct http_response *direct_response(struct client_state *csp) { for (p = csp->headers->first; (p != NULL) ; p = p->next) { - if (!strncmpic("Max-Forwards:", p->str, 13)) + if (!strncmpic(p->str, "Max-Forwards:", 13)) { unsigned int max_forwards; @@ -2255,7 +2292,7 @@ struct http_response *direct_response(struct client_state *csp) { return cgi_error_memory(); } - + if (NULL == (rsp->status = strdup("501 Not Implemented"))) { free_http_response(rsp); @@ -2356,7 +2393,7 @@ int content_requires_filtering(struct client_state *csp) * Description : Checks whether there are any content filters * enabled for the current request. * - * Parameters : + * Parameters : * 1 : action = Action spec to check. * * Returns : TRUE for yes, FALSE otherwise