X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=filters.c;h=e2f490c7d274499026920b0f09878154921f4d78;hb=8c3e4f19001300c07cb076353eba625e4729fdd2;hp=f3b31bb5eaf0e93b42d1be904077209a78777285;hpb=df060e5bb4913aa5130f01302d3228a7171a2919;p=privoxy.git diff --git a/filters.c b/filters.c index f3b31bb5..e2f490c7 100644 --- a/filters.c +++ b/filters.c @@ -1,4 +1,4 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.128 2010/04/03 13:23:28 fabiankeil Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.154 2011/10/30 16:22:46 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ @@ -50,11 +50,6 @@ const char filters_rcs[] = "$Id: filters.c,v 1.128 2010/04/03 13:23:28 fabiankei #include #include -#ifdef HAVE_RFC2553 -#include -#include -#endif /* def HAVE_RFC2553 */ - #ifndef _WIN32 #ifndef __OS2__ #include @@ -83,6 +78,11 @@ const char filters_rcs[] = "$Id: filters.c,v 1.128 2010/04/03 13:23:28 fabiankei #include "urlmatch.h" #include "loaders.h" +#ifdef HAVE_STRTOK +/* Only used for locks */ +#include "jcc.h" +#endif /* def HAVE_STRTOK */ + #ifdef _WIN32 #include "win32.h" #endif @@ -98,6 +98,8 @@ const char filters_h_rcs[] = FILTERS_H_VERSION; */ #define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X)) +typedef char *(*filter_function_ptr)(); +static filter_function_ptr get_filter_function(const struct client_state *csp); static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size); static jb_err prepare_for_filtering(struct client_state *csp); @@ -292,6 +294,10 @@ int block_acl(const struct access_control_addr *dst, const struct client_state * { return(0); } + else + { + return(1); + } } else if ( #ifdef HAVE_RFC2553 @@ -484,7 +490,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) * of octets (128-bit CPU could do it in one iteration). */ /* - * Octets after prefix can be ommitted because of + * Octets after prefix can be omitted because of * previous initialization to zeros. */ for (i = 0; (i < addr_len) && masklength; i++) @@ -573,7 +579,7 @@ struct http_response *block_url(struct client_state *csp) } if (csp->action->flags & ACTION_REDIRECT) { - log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block."); + log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block."); } /* * Else, prepare a response @@ -667,7 +673,7 @@ struct http_response *block_url(struct client_state *csp) if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT) { /* - * Send empty document. + * Send empty document. */ new_content_type = csp->action->string[ACTION_STRING_CONTENT_TYPE]; @@ -713,27 +719,8 @@ struct http_response *block_url(struct client_state *csp) { jb_err err; struct map * exports; - char *p; - - /* - * Workaround for stupid Netscape bug which prevents - * pages from being displayed if loading a referenced - * JavaScript or style sheet fails. So make it appear - * as if it succeeded. - */ - if ( NULL != (p = get_header_value(csp->headers, "User-Agent:")) - && !strncmpic(p, "mozilla", 7) /* Catch Netscape but */ - && !strstr(p, "Gecko") /* save Mozilla, */ - && !strstr(p, "compatible") /* MSIE */ - && !strstr(p, "Opera")) /* and Opera. */ - { - rsp->status = strdup("200 Request for blocked URL"); - } - else - { - rsp->status = strdup("403 Request for blocked URL"); - } + rsp->status = strdup("403 Request blocked by Privoxy"); if (rsp->status == NULL) { free_http_response(rsp); @@ -795,7 +782,7 @@ struct http_response *block_url(struct client_state *csp) return cgi_error_memory(); } } - rsp->reason = RSP_REASON_BLOCKED; + rsp->crunch_reason = BLOCKED; return finish_http_response(csp, rsp); @@ -854,7 +841,7 @@ struct http_response *trust_url(struct client_state *csp) * Export the protocol, host, port, and referrer information */ err = map(exports, "hostport", 1, csp->http->hostport, 1); - if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1); + if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1); if (!err) err = map(exports, "path", 1, csp->http->path, 1); if (NULL != (p = get_header_value(csp->headers, "Referer:"))) @@ -955,7 +942,7 @@ struct http_response *trust_url(struct client_state *csp) free_http_response(rsp); return cgi_error_memory(); } - rsp->reason = RSP_REASON_UNTRUSTED; + rsp->crunch_reason = UNTRUSTED; return finish_http_response(csp, rsp); } @@ -974,7 +961,7 @@ struct http_response *trust_url(struct client_state *csp) * 2 : b = The filter list to compile * * Returns : NULL in case of errors, otherwise the - * pcrs job list. + * pcrs job list. * *********************************************************************/ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const struct re_filterfile_spec *b) @@ -1001,10 +988,9 @@ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const st dummy = pcrs_compile_dynamic_command(pattern->str, variables, &error); if (NULL == dummy) { - assert(error < 0); log_error(LOG_LEVEL_ERROR, - "Adding filter job \'%s\' to dynamic filter %s failed: %s", - pattern->str, b->name, pcrs_strerror(error)); + "Compiling dynamic pcrs job '%s' for '%s' failed with error code %d: %s", + pattern->str, b->name, error, pcrs_strerror(error)); continue; } else @@ -1044,7 +1030,7 @@ pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const st * 2 : pcrs_command = pcrs command formatted as string (s@foo@bar@) * * - * Returns : NULL if the pcrs_command didn't change the url, or + * Returns : NULL if the pcrs_command didn't change the url, or * the result of the modification. * *********************************************************************/ @@ -1103,7 +1089,7 @@ char *rewrite_url(char *old_url, const char *pcrs_command) * * Parameters : * 1 : subject = the string to check - * 2 : redirect_mode = +fast-redirect{} mode + * 2 : redirect_mode = +fast-redirect{} mode * * Returns : NULL if no URL was found, or * the last URL found. @@ -1126,7 +1112,54 @@ char *get_last_url(char *subject, const char *redirect_mode) if (0 == strcmpic(redirect_mode, "check-decoded-url")) { - log_error(LOG_LEVEL_REDIRECTS, "Decoding \"%s\" if necessary.", subject); + log_error(LOG_LEVEL_REDIRECTS, + "Checking \"%s\" for encoded redirects.", subject); + +#if defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK) + /* + * Check each parameter in the URL separately. + * Sectionize the URL at "?" and "&", + * then URL-decode each component, + * and look for a URL in the decoded result. + * Keep the last one we spot. + */ + char *found = NULL; + + privoxy_mutex_lock(&strtok_mutex); + char *token = strtok(subject, "?&"); + while (token) + { + char *dtoken = url_decode(token); + if (NULL == dtoken) + { + log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", token); + continue; + } + char *http_url = strstr(dtoken, "http://"); + char *https_url = strstr(dtoken, "https://"); + char *last_url = (http_url && https_url + ? (http_url < https_url ? http_url : https_url) + : (http_url ? http_url : https_url)); + if (last_url) + { + freez(found); + found = strdup(last_url); + if (found == NULL) + { + log_error(LOG_LEVEL_ERROR, + "Out of memory while searching for redirects."); + privoxy_mutex_unlock(&strtok_mutex); + return NULL; + } + } + freez(dtoken); + token = strtok(NULL, "?&"); + } + privoxy_mutex_unlock(&strtok_mutex); + freez(subject); + + return found; +#else new_url = url_decode(subject); if (new_url != NULL) { @@ -1137,9 +1170,13 @@ char *get_last_url(char *subject, const char *redirect_mode) { log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject); } +#endif /* defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK) */ } - log_error(LOG_LEVEL_REDIRECTS, "Checking \"%s\" for redirects.", subject); + /* Else, just look for a URL inside this one, without decoding anything. */ + + log_error(LOG_LEVEL_REDIRECTS, + "Checking \"%s\" for unencoded redirects.", subject); /* * Find the last URL encoded in the request @@ -1162,7 +1199,7 @@ char *get_last_url(char *subject, const char *redirect_mode) )) { /* - * Return new URL if we found a redirect + * Return new URL if we found a redirect * or if the subject already was a URL. * * The second case makes sure that we can @@ -1268,7 +1305,7 @@ struct http_response *redirect_url(struct client_state *csp) #endif /* def FEATURE_FAST_REDIRECTS */ csp->action->flags &= ~ACTION_REDIRECT; - /* Did any redirect action trigger? */ + /* Did any redirect action trigger? */ if (new_url) { if (0 == strcmpic(new_url, csp->http->url)) @@ -1288,14 +1325,14 @@ struct http_response *redirect_url(struct client_state *csp) return cgi_error_memory(); } - if ( enlist_unique_header(rsp->headers, "Location", new_url) - || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) ) + if (enlist_unique_header(rsp->headers, "Location", new_url) + || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy")))) { freez(new_url); free_http_response(rsp); return cgi_error_memory(); } - rsp->reason = RSP_REASON_REDIRECTED; + rsp->crunch_reason = REDIRECTED; freez(new_url); return finish_http_response(csp, rsp); @@ -1522,7 +1559,7 @@ static char *pcrs_filter_response(struct client_state *csp) struct re_filterfile_spec *b; struct list_entry *filtername; - /* + /* * Sanity first */ if (csp->iob->cur >= csp->iob->eod) @@ -1738,48 +1775,10 @@ static char *gif_deanimate_response(struct client_state *csp) * NULL if no content filter is active * *********************************************************************/ -filter_function_ptr get_filter_function(struct client_state *csp) +static filter_function_ptr get_filter_function(const struct client_state *csp) { filter_function_ptr filter_function = NULL; - if ((csp->content_type & CT_TABOO) - && !(csp->action->flags & ACTION_FORCE_TEXT_MODE)) - { - return NULL; - } - - /* - * Are we enabling text mode by force? - */ - if (csp->action->flags & ACTION_FORCE_TEXT_MODE) - { - /* - * Do we really have to? - */ - if (csp->content_type & CT_TEXT) - { - log_error(LOG_LEVEL_HEADER, "Text mode is already enabled."); - } - else - { - csp->content_type |= CT_TEXT; - log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!"); - } - } - - if (!(csp->content_type & CT_DECLARED)) - { - /* - * The server didn't bother to declare a MIME-Type. - * Assume it's text that can be filtered. - * - * This also regulary happens with 304 responses, - * therefore logging anything here would cause - * too much noise. - */ - csp->content_type |= CT_TEXT; - } - /* * Choose the applying filter function based on * the content type and action settings. @@ -1842,8 +1841,14 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) if ((newsize += chunksize) >= *size) { + /* + * XXX: The message is a bit confusing. Isn't the real problem that + * the specified chunk size is greater than the number of bytes + * left in the buffer? This probably means the connection got + * closed prematurely. To be investigated after 3.0.17 is out. + */ log_error(LOG_LEVEL_ERROR, - "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding", + "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding", chunksize, *size); return JB_ERR_PARSE; } @@ -1859,7 +1864,7 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) break; } } - + /* XXX: Should get its own loglevel. */ log_error(LOG_LEVEL_RE_FILTER, "De-chunking successful. Shrunk from %d to %d", *size, newsize); @@ -1947,20 +1952,23 @@ static jb_err prepare_for_filtering(struct client_state *csp) /********************************************************************* * - * Function : execute_content_filter + * Function : execute_content_filters * * Description : Executes a given content filter. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) - * 2 : content_filter = The filter function to execute * * Returns : Pointer to the modified buffer, or * NULL if filtering failed or wasn't necessary. * *********************************************************************/ -char *execute_content_filter(struct client_state *csp, filter_function_ptr content_filter) +char *execute_content_filters(struct client_state *csp) { + filter_function_ptr content_filter; + + assert(content_filters_enabled(csp->action)); + if (0 == csp->iob->eod - csp->iob->cur) { /* @@ -1986,6 +1994,8 @@ char *execute_content_filter(struct client_state *csp, filter_function_ptr conte return NULL; } + content_filter = get_filter_function(csp); + return ((*content_filter)(csp)); } @@ -2233,7 +2243,7 @@ const struct forward_spec *forward_url(struct client_state *csp, /********************************************************************* * - * Function : direct_response + * Function : direct_response * * Description : Check if Max-Forwards == 0 for an OPTIONS or TRACE * request and if so, return a HTTP 501 to the client. @@ -2242,7 +2252,7 @@ const struct forward_spec *forward_url(struct client_state *csp, * requests properly. Still, what we do here is rfc- * compliant, whereas ignoring or forwarding are not. * - * Parameters : + * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) * * Returns : http_response if , NULL if nonmatch or handler fail @@ -2258,7 +2268,7 @@ struct http_response *direct_response(struct client_state *csp) { for (p = csp->headers->first; (p != NULL) ; p = p->next) { - if (!strncmpic("Max-Forwards:", p->str, 13)) + if (!strncmpic(p->str, "Max-Forwards:", 13)) { unsigned int max_forwards; @@ -2282,7 +2292,7 @@ struct http_response *direct_response(struct client_state *csp) { return cgi_error_memory(); } - + if (NULL == (rsp->status = strdup("501 Not Implemented"))) { free_http_response(rsp); @@ -2290,7 +2300,7 @@ struct http_response *direct_response(struct client_state *csp) } rsp->is_static = 1; - rsp->reason = RSP_REASON_UNSUPPORTED; + rsp->crunch_reason = UNSUPPORTED; return(finish_http_response(csp, rsp)); } @@ -2301,6 +2311,81 @@ struct http_response *direct_response(struct client_state *csp) } +/********************************************************************* + * + * Function : content_requires_filtering + * + * Description : Checks whether there are any content filters + * enabled for the current request and if they + * can actually be applied.. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : TRUE for yes, FALSE otherwise + * + *********************************************************************/ +int content_requires_filtering(struct client_state *csp) +{ + if ((csp->content_type & CT_TABOO) + && !(csp->action->flags & ACTION_FORCE_TEXT_MODE)) + { + return FALSE; + } + + /* + * Are we enabling text mode by force? + */ + if (csp->action->flags & ACTION_FORCE_TEXT_MODE) + { + /* + * Do we really have to? + */ + if (csp->content_type & CT_TEXT) + { + log_error(LOG_LEVEL_HEADER, "Text mode is already enabled."); + } + else + { + csp->content_type |= CT_TEXT; + log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!"); + } + } + + if (!(csp->content_type & CT_DECLARED)) + { + /* + * The server didn't bother to declare a MIME-Type. + * Assume it's text that can be filtered. + * + * This also regulary happens with 304 responses, + * therefore logging anything here would cause + * too much noise. + */ + csp->content_type |= CT_TEXT; + } + + /* + * Choose the applying filter function based on + * the content type and action settings. + */ + if ((csp->content_type & CT_TEXT) && + (csp->rlist != NULL) && + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + { + return TRUE; + } + else if ((csp->content_type & CT_GIF) && + (csp->action->flags & ACTION_DEANIMATE)) + { + return TRUE; + } + + return FALSE; + +} + + /********************************************************************* * * Function : content_filters_enabled @@ -2308,7 +2393,7 @@ struct http_response *direct_response(struct client_state *csp) * Description : Checks whether there are any content filters * enabled for the current request. * - * Parameters : + * Parameters : * 1 : action = Action spec to check. * * Returns : TRUE for yes, FALSE otherwise