X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=filters.c;h=292db4bcceb0c546c5aec930198b63321e46bbe8;hp=d56bf1fef508fa443c2be8ece383c9c66ad8dcea;hb=0e2be80c3229e207936c707d42630914eef3caaa;hpb=e10b140e837860410ab2b97562b0d919beea4df0 diff --git a/filters.c b/filters.c index d56bf1fe..292db4bc 100644 --- a/filters.c +++ b/filters.c @@ -1,4 +1,4 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.90 2007/09/02 12:44:17 fabiankeil Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.96 2007/10/19 16:53:28 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ @@ -40,6 +40,34 @@ const char filters_rcs[] = "$Id: filters.c,v 1.90 2007/09/02 12:44:17 fabiankeil * * Revisions : * $Log: filters.c,v $ + * Revision 1.96 2007/10/19 16:53:28 fabiankeil + * Add helper function to check if any content filters are enabled. + * + * Revision 1.95 2007/10/17 19:31:20 fabiankeil + * Omitting the zero chunk that ends the chunk transfer encoding seems + * to be the new black. Log the problem and continue filtering anyway. + * + * Revision 1.94 2007/09/29 13:20:20 fabiankeil + * Remove two redundant and one useless log messages. + * + * Revision 1.93 2007/09/29 10:21:16 fabiankeil + * - Move get_filter_function() from jcc.c to filters.c + * so the filter functions can be static. + * - Don't bother filtering body-less responses. + * + * Revision 1.92 2007/09/28 16:38:55 fabiankeil + * - Execute content filters through execute_content_filter(). + * - Add prepare_for_filtering() so filter functions don't have to + * care about de-chunking and decompression. As a side effect this enables + * decompression for gif_deanimate_response() and jpeg_inspect_response(). + * - Change remove_chunked_transfer_coding()'s return type to jb_err. + * Some clowns feel like chunking empty responses in which case + * (size == 0) is valid but previously would be interpreted as error. + * + * Revision 1.91 2007/09/02 15:31:20 fabiankeil + * Move match_portlist() from filter.c to urlmatch.c. + * It's used for url matching, not for filtering. + * * Revision 1.90 2007/09/02 12:44:17 fabiankeil * Remove newline at the end of a log_error() message. * @@ -618,6 +646,8 @@ const char filters_h_rcs[] = FILTERS_H_VERSION; */ #define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X)) +static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size); +static jb_err prepare_for_filtering(struct client_state *csp); #ifdef FEATURE_ACL /********************************************************************* @@ -719,7 +749,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) *p++ = '\0'; if (ijb_isdigit(*p) == 0) { - free(acl_spec); + freez(acl_spec); return(-1); } masklength = atoi(p); @@ -727,7 +757,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) if ((masklength < 0) || (masklength > 32)) { - free(acl_spec); + freez(acl_spec); return(-1); } @@ -740,7 +770,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) if (port <= 0 || port > 65535 || *endptr != '\0') { - free(acl_spec); + freez(acl_spec); return(-1); } } @@ -748,7 +778,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca) aca->port = (unsigned long)port; aca->addr = ntohl(resolve_hostname_to_ip(acl_spec)); - free(acl_spec); + freez(acl_spec); if (aca->addr == INADDR_NONE) { @@ -1695,7 +1725,7 @@ int is_untrusted_url(const struct client_state *csp) log_error(LOG_LEVEL_ERROR, "Failed to append \'%s\' to trustfile \'%s\': %E", new_entry, csp->config->trustfile); } - free(new_entry); + freez(new_entry); } else { @@ -1724,15 +1754,8 @@ int is_untrusted_url(const struct client_state *csp) * Function : pcrs_filter_response * * Description : Execute all text substitutions from all applying - * +filter actions on the text buffer that's been accumulated - * in csp->iob->buf. If this changes the contents, set - * csp->content_length to the modified size and raise the - * CSP_FLAG_MODIFIED flag. - * - * XXX: Currently pcrs_filter_response is also responsible - * for dechunking and decompressing. Both should be - * done in separate functions so other content modifiers - * profit as well, even if pcrs filtering is disabled. + * +filter actions on the text buffer that's been + * accumulated in csp->iob->buf. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -1741,12 +1764,13 @@ int is_untrusted_url(const struct client_state *csp) * or NULL if there were no hits or something went wrong * *********************************************************************/ -char *pcrs_filter_response(struct client_state *csp) +static char *pcrs_filter_response(struct client_state *csp) { int hits=0; size_t size, prev_size; - char *old = csp->iob->cur, *new = NULL; + char *old = NULL; + char *new = NULL; pcrs_job *job; struct file_list *fl; @@ -1762,7 +1786,6 @@ char *pcrs_filter_response(struct client_state *csp) { return(NULL); } - size = (size_t)(csp->iob->eod - csp->iob->cur); /* * Need to check the set of re_filterfiles... @@ -1786,59 +1809,8 @@ char *pcrs_filter_response(struct client_state *csp) return(NULL); } - /* - * If the body has a "chunked" transfer-encoding, - * get rid of it first, adjusting size and iob->eod - */ - if (csp->flags & CSP_FLAG_CHUNKED) - { - log_error(LOG_LEVEL_RE_FILTER, "Need to de-chunk first"); - if (0 == (size = remove_chunked_transfer_coding(csp->iob->cur, size))) - { - return(NULL); - } - csp->iob->eod = csp->iob->cur + size; - csp->flags |= CSP_FLAG_MODIFIED; - } - -#ifdef FEATURE_ZLIB - /* - * If the body has a compressed transfer-encoding, - * uncompress it first, adjusting size and iob->eod. - * Note that decompression occurs after de-chunking. - */ - if (csp->content_type & (CT_GZIP | CT_DEFLATE)) - { - /* Notice that we at least tried to decompress. */ - if (JB_ERR_OK != decompress_iob(csp)) - { - /* - * We failed to decompress the data; there's no point - * in continuing since we can't filter. - * - * XXX: Actually the Accept-Encoding header may - * just be incorrect in which case we could continue - * with filtering. - * - * Unset CT_GZIP and CT_DEFLATE to remember not - * to modify the Content-Encoding header later. - */ - csp->content_type &= ~CT_GZIP; - csp->content_type &= ~CT_DEFLATE; - return(NULL); - } - - /* - * Decompression gives us a completely new iob, - * so we need to update. - */ - size = (size_t)(csp->iob->eod - csp->iob->cur); - old = csp->iob->cur; - - csp->flags |= CSP_FLAG_MODIFIED; - csp->content_type &= ~CT_TABOO; - } -#endif + size = (size_t)(csp->iob->eod - csp->iob->cur); + old = csp->iob->cur; for (i = 0; i < MAX_AF_FILES; i++) { @@ -1904,7 +1876,7 @@ char *pcrs_filter_response(struct client_state *csp) current_hits += job_hits; if (old != csp->iob->cur) { - free(old); + freez(old); } old = new; } @@ -1950,7 +1922,7 @@ char *pcrs_filter_response(struct client_state *csp) */ if (!hits) { - free(new); + freez(new); return(NULL); } @@ -1978,7 +1950,7 @@ char *pcrs_filter_response(struct client_state *csp) * or NULL in case something went wrong. * *********************************************************************/ -char *gif_deanimate_response(struct client_state *csp) +static char *gif_deanimate_response(struct client_state *csp) { struct binbuffer *in, *out; char *p; @@ -1986,21 +1958,6 @@ char *gif_deanimate_response(struct client_state *csp) size = (size_t)(csp->iob->eod - csp->iob->cur); - /* - * If the body has a "chunked" transfer-encoding, - * get rid of it first, adjusting size and iob->eod - */ - if (csp->flags & CSP_FLAG_CHUNKED) - { - log_error(LOG_LEVEL_DEANIMATE, "Need to de-chunk first"); - if (0 == (size = remove_chunked_transfer_coding(csp->iob->cur, size))) - { - return(NULL); - } - csp->iob->eod = csp->iob->cur + size; - csp->flags |= CSP_FLAG_MODIFIED; - } - if ( (NULL == (in = (struct binbuffer *)zalloc(sizeof *in ))) || (NULL == (out = (struct binbuffer *)zalloc(sizeof *out))) ) { @@ -2014,7 +1971,7 @@ char *gif_deanimate_response(struct client_state *csp) if (gif_deanimate(in, out, strncmp("last", csp->action->string[ACTION_STRING_DEANIMATE], 4))) { log_error(LOG_LEVEL_DEANIMATE, "failed! (gif parsing)"); - free(in); + freez(in); buf_free(out); return(NULL); } @@ -2031,8 +1988,8 @@ char *gif_deanimate_response(struct client_state *csp) csp->content_length = out->offset; csp->flags |= CSP_FLAG_MODIFIED; p = out->buffer; - free(in); - free(out); + freez(in); + freez(out); return(p); } @@ -2052,7 +2009,7 @@ char *gif_deanimate_response(struct client_state *csp) * or NULL in case something went wrong. * *********************************************************************/ -char *jpeg_inspect_response(struct client_state *csp) +static char *jpeg_inspect_response(struct client_state *csp) { struct binbuffer *in = NULL; struct binbuffer *out = NULL; @@ -2061,21 +2018,6 @@ char *jpeg_inspect_response(struct client_state *csp) size = (size_t)(csp->iob->eod - csp->iob->cur); - /* - * If the body has a "chunked" transfer-encoding, - * get rid of it first, adjusting size and iob->eod - */ - if (csp->flags & CSP_FLAG_CHUNKED) - { - log_error(LOG_LEVEL_DEANIMATE, "Need to de-chunk first"); - if (0 == (size = remove_chunked_transfer_coding(csp->iob->cur, size))) - { - return(NULL); - } - csp->iob->eod = csp->iob->cur + size; - csp->flags |= CSP_FLAG_MODIFIED; - } - if (NULL == (in = (struct binbuffer *)zalloc(sizeof *in ))) { log_error(LOG_LEVEL_DEANIMATE, "failed! (jpeg no mem 1)"); @@ -2098,7 +2040,7 @@ char *jpeg_inspect_response(struct client_state *csp) if (jpeg_inspect(in, out)) { log_error(LOG_LEVEL_DEANIMATE, "failed! (jpeg parsing)"); - free(in); + freez(in); buf_free(out); return(NULL); @@ -2108,14 +2050,95 @@ char *jpeg_inspect_response(struct client_state *csp) csp->content_length = out->offset; csp->flags |= CSP_FLAG_MODIFIED; p = out->buffer; - free(in); - free(out); + freez(in); + freez(out); return(p); } } +/********************************************************************* + * + * Function : get_filter_function + * + * Description : Decides which content filter function has + * to be applied (if any). + * + * XXX: Doesn't handle filter_popups() + * because of the different prototype. Probably + * we should ditch filter_popups() anyway, it's + * even less reliable than popup blocking based + * on pcrs filters. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : The content filter function to run, or + * NULL if no content filter is active + * + *********************************************************************/ +filter_function_ptr get_filter_function(struct client_state *csp) +{ + filter_function_ptr filter_function = NULL; + + /* + * Are we enabling text mode by force? + */ + if (csp->action->flags & ACTION_FORCE_TEXT_MODE) + { + /* + * Do we really have to? + */ + if (csp->content_type & CT_TEXT) + { + log_error(LOG_LEVEL_HEADER, "Text mode is already enabled."); + } + else + { + csp->content_type |= CT_TEXT; + log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!"); + } + } + + if (!(csp->content_type & CT_DECLARED)) + { + /* + * The server didn't bother to declare a MIME-Type. + * Assume it's text that can be filtered. + * + * This also regulary happens with 304 responses, + * therefore logging anything here would cause + * too much noise. + */ + csp->content_type |= CT_TEXT; + } + + /* + * Choose the applying filter function based on + * the content type and action settings. + */ + if ((csp->content_type & CT_TEXT) && + (csp->rlist != NULL) && + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + { + filter_function = pcrs_filter_response; + } + else if ((csp->content_type & CT_GIF) && + (csp->action->flags & ACTION_DEANIMATE)) + { + filter_function = gif_deanimate_response; + } + else if ((csp->content_type & CT_JPEG) && + (csp->action->flags & ACTION_JPEG_INSPECT)) + { + filter_function = jpeg_inspect_response; + } + + return filter_function; +} + + /********************************************************************* * * Function : remove_chunked_transfer_coding @@ -2125,14 +2148,15 @@ char *jpeg_inspect_response(struct client_state *csp) * * Parameters : * 1 : buffer = Pointer to the text buffer - * 2 : size = Number of bytes to be processed + * 2 : size = In: Number of bytes to be processed, + * Out: Number of bytes after de-chunking. + * (undefined in case of errors) * - * Returns : The new size, i.e. the number of bytes from buffer which - * are occupied by the stripped body, or 0 in case something - * went wrong + * Returns : JB_ERR_OK for success, + * JB_ERR_PARSE otherwise * *********************************************************************/ -size_t remove_chunked_transfer_coding(char *buffer, const size_t size) +static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) { size_t newsize = 0; unsigned int chunksize = 0; @@ -2144,7 +2168,7 @@ size_t remove_chunked_transfer_coding(char *buffer, const size_t size) if (sscanf(buffer, "%x", &chunksize) != 1) { log_error(LOG_LEVEL_ERROR, "Invalid first chunksize while stripping \"chunked\" transfer coding"); - return(0); + return JB_ERR_PARSE; } while (chunksize > 0) @@ -2152,13 +2176,13 @@ size_t remove_chunked_transfer_coding(char *buffer, const size_t size) if (NULL == (from_p = strstr(from_p, "\r\n"))) { log_error(LOG_LEVEL_ERROR, "Parse error while stripping \"chunked\" transfer coding"); - return(0); + return JB_ERR_PARSE; } - if ((newsize += chunksize) >= size) + if ((newsize += chunksize) >= *size) { log_error(LOG_LEVEL_ERROR, "Chunksize exceeds buffer in \"chunked\" transfer coding"); - return(0); + return JB_ERR_PARSE; } from_p += 2; @@ -2168,18 +2192,141 @@ size_t remove_chunked_transfer_coding(char *buffer, const size_t size) if (sscanf(from_p, "%x", &chunksize) != 1) { - log_error(LOG_LEVEL_ERROR, "Parse error while stripping \"chunked\" transfer coding"); - return(0); + log_error(LOG_LEVEL_INFO, "Invalid \"chunked\" transfer encoding detected and ignored."); + break; } } + + /* XXX: Should get its own loglevel. */ + log_error(LOG_LEVEL_RE_FILTER, "De-chunking successful. Shrunk from %d to %d", *size, newsize); - /* FIXME: Should this get its own loglevel? */ - log_error(LOG_LEVEL_RE_FILTER, "De-chunking successful. Shrunk from %d to %d", size, newsize); - return(newsize); + *size = newsize; + + return JB_ERR_OK; } +/********************************************************************* + * + * Function : prepare_for_filtering + * + * Description : If necessary, de-chunks and decompresses + * the content so it can get filterd. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK for success, + * JB_ERR_PARSE otherwise + * + *********************************************************************/ +static jb_err prepare_for_filtering(struct client_state *csp) +{ + jb_err err = JB_ERR_OK; + + /* + * If the body has a "chunked" transfer-encoding, + * get rid of it, adjusting size and iob->eod + */ + if (csp->flags & CSP_FLAG_CHUNKED) + { + size_t size = (size_t)(csp->iob->eod - csp->iob->cur); + + log_error(LOG_LEVEL_RE_FILTER, "Need to de-chunk first"); + err = remove_chunked_transfer_coding(csp->iob->cur, &size); + if (JB_ERR_OK == err) + { + csp->iob->eod = csp->iob->cur + size; + csp->flags |= CSP_FLAG_MODIFIED; + } + else + { + return JB_ERR_PARSE; + } + } + +#ifdef FEATURE_ZLIB + /* + * If the body has a supported transfer-encoding, + * decompress it, adjusting size and iob->eod. + */ + if (csp->content_type & (CT_GZIP|CT_DEFLATE)) + { + if (0 == csp->iob->eod - csp->iob->cur) + { + /* Nothing left after de-chunking. */ + return JB_ERR_OK; + } + + err = decompress_iob(csp); + + if (JB_ERR_OK == err) + { + csp->flags |= CSP_FLAG_MODIFIED; + csp->content_type &= ~CT_TABOO; + } + else + { + /* + * Unset CT_GZIP and CT_DEFLATE to remember not + * to modify the Content-Encoding header later. + */ + csp->content_type &= ~CT_GZIP; + csp->content_type &= ~CT_DEFLATE; + } + } +#endif + + return err; +} + + +/********************************************************************* + * + * Function : execute_content_filter + * + * Description : Executes a given content filter. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : content_filter = The filter function to execute + * + * Returns : Pointer to the modified buffer, or + * NULL if filtering failed or wasn't necessary. + * + *********************************************************************/ +char *execute_content_filter(struct client_state *csp, filter_function_ptr content_filter) +{ + if (0 == csp->iob->eod - csp->iob->cur) + { + /* + * No content (probably status code 301, 302 ...), + * no filtering necessary. + */ + return NULL; + } + + if (JB_ERR_OK != prepare_for_filtering(csp)) + { + /* + * failed to de-chunk or decompress. + */ + return NULL; + } + + if (0 == csp->iob->eod - csp->iob->cur) + { + /* + * Clown alarm: chunked and/or compressed nothing delivered. + */ + return NULL; + } + + return ((*content_filter)(csp)); +} + + /********************************************************************* * * Function : url_actions @@ -2506,6 +2653,26 @@ struct http_response *direct_response(struct client_state *csp) } +/********************************************************************* + * + * Function : content_filters_enabled + * + * Description : Checks whether there are any content filters + * enabled for the current request. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : TRUE for yes, FALSE otherwise + * + *********************************************************************/ +inline int content_filters_enabled(const struct client_state *csp) +{ + return (((csp->rlist != NULL) && + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) || + (csp->action->flags & (ACTION_DEANIMATE|ACTION_JPEG_INSPECT|ACTION_NO_POPUPS))); +} + /* Local Variables: tab-width: 3