X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=filters.c;h=a384e1b2c92a117627c6c36c0ec8761043cca701;hb=db1fb0ac271ade83ddcae260f5d5806e678f2b7f;hp=fb5de235c3652e44cc9bb10a9790e2f70c8a7e65;hpb=17733b62b31ff476531e054b6fcf553eb8fb107b;p=privoxy.git diff --git a/filters.c b/filters.c index fb5de235..a384e1b2 100644 --- a/filters.c +++ b/filters.c @@ -1,11 +1,11 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.178 2013/11/24 14:22:51 fabiankeil Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.185 2014/06/12 13:08:25 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ * * Purpose : Declares functions to parse/crunch headers and pages. * - * Copyright : Written by and Copyright (C) 2001-2011 the + * Copyright : Written by and Copyright (C) 2001-2014 the * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -65,6 +65,7 @@ const char filters_rcs[] = "$Id: filters.c,v 1.178 2013/11/24 14:22:51 fabiankei #include "miscutil.h" #include "actions.h" #include "cgi.h" +#include "jcc.h" #include "list.h" #include "deanimate.h" #include "urlmatch.h" @@ -1237,11 +1238,14 @@ struct http_response *redirect_url(struct client_state *csp) * properly formatted URL and use it for the redirection * directly. * - * According to RFC 2616 section 14.30 the URL - * has to be absolute and if the user tries: - * +redirect{shit/this/will/be/parsed/as/pcrs_command.html} + * According to (the now obsolete) RFC 2616 section 14.30 + * the URL has to be absolute and if the user tries: + * +redirect{sadly/this/will/be/parsed/as/pcrs_command.html} * she would get undefined results anyway. * + * RFC 7231 7.1.2 actually allows relative references, + * but those start with a leading slash (RFC 3986 4.2) and + * thus can't be mistaken for pcrs commands either. */ if (*redirection_string == 's') @@ -1638,80 +1642,80 @@ static char *pcrs_filter_response(struct client_state *csp) for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first; filtername != NULL; filtername = filtername->next) { + int current_hits = 0; /* Number of hits caused by this filter */ + int job_number = 0; /* Which job we're currently executing */ + int job_hits = 0; /* How many hits the current job caused */ + pcrs_job *joblist; + b = get_filter(csp, filtername->str, FT_CONTENT_FILTER); if (b == NULL) { continue; } - { - int current_hits = 0; /* Number of hits caused by this filter */ - int job_number = 0; /* Which job we're currently executing */ - int job_hits = 0; /* How many hits the current job caused */ - pcrs_job *joblist = b->joblist; - - if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); - - if (NULL == joblist) - { - log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name); - continue; - } - prev_size = size; - /* Apply all jobs from the joblist */ - for (job = joblist; NULL != job; job = job->next) - { - job_number++; - job_hits = pcrs_execute(job, old, size, &new, &size); + joblist = b->joblist; - if (job_hits >= 0) - { - /* - * That went well. Continue filtering - * and use the result of this job as - * input for the next one. - */ - current_hits += job_hits; - if (old != csp->iob->cur) - { - freez(old); - } - old = new; - } - else - { - /* - * This job caused an unexpected error. Inform the user - * and skip the rest of the jobs in this filter. We could - * continue with the next job, but usually the jobs - * depend on each other or are similar enough to - * fail for the same reason. - * - * At the moment our pcrs expects the error codes of pcre 3.4, - * but newer pcre versions can return additional error codes. - * As a result pcrs_strerror()'s error message might be - * "Unknown error ...", therefore we print the numerical value - * as well. - * - * XXX: Is this important enough for LOG_LEVEL_ERROR or - * should we use LOG_LEVEL_RE_FILTER instead? - */ - log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)", - b->name, job_number, pcrs_strerror(job_hits), job_hits); - break; - } - } + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); - if (b->dynamic) pcrs_free_joblist(joblist); + if (NULL == joblist) + { + log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name); + continue; + } - log_error(LOG_LEVEL_RE_FILTER, - "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).", - csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size); + prev_size = size; + /* Apply all jobs from the joblist */ + for (job = joblist; NULL != job; job = job->next) + { + job_number++; + job_hits = pcrs_execute(job, old, size, &new, &size); - hits += current_hits; + if (job_hits >= 0) + { + /* + * That went well. Continue filtering + * and use the result of this job as + * input for the next one. + */ + current_hits += job_hits; + if (old != csp->iob->cur) + { + freez(old); + } + old = new; + } + else + { + /* + * This job caused an unexpected error. Inform the user + * and skip the rest of the jobs in this filter. We could + * continue with the next job, but usually the jobs + * depend on each other or are similar enough to + * fail for the same reason. + * + * At the moment our pcrs expects the error codes of pcre 3.4, + * but newer pcre versions can return additional error codes. + * As a result pcrs_strerror()'s error message might be + * "Unknown error ...", therefore we print the numerical value + * as well. + * + * XXX: Is this important enough for LOG_LEVEL_ERROR or + * should we use LOG_LEVEL_RE_FILTER instead? + */ + log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)", + b->name, job_number, pcrs_strerror(job_hits), job_hits); + break; } } + if (b->dynamic) pcrs_free_joblist(joblist); + + log_error(LOG_LEVEL_RE_FILTER, + "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).", + csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size); + + hits += current_hits; + } /* * If there were no hits, destroy our copy and let @@ -1732,6 +1736,229 @@ static char *pcrs_filter_response(struct client_state *csp) } +#ifdef FEATURE_EXTERNAL_FILTERS +/********************************************************************* + * + * Function : get_external_filter + * + * Description : Lookup the code to execute for an external filter. + * Masks the misuse of the re_filterfile_spec. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : name = Name of the content filter to get + * + * Returns : A pointer to the requested code + * or NULL if the filter wasn't found + * + *********************************************************************/ +static const char *get_external_filter(const struct client_state *csp, + const char *name) +{ + struct re_filterfile_spec *external_filter; + + external_filter = get_filter(csp, name, FT_EXTERNAL_CONTENT_FILTER); + if (external_filter == NULL) + { + log_error(LOG_LEVEL_FATAL, + "Didn't find stuff to execute for external filter: %s", + name); + } + + return external_filter->patterns->first->str; + +} + + +/********************************************************************* + * + * Function : set_privoxy_variables + * + * Description : Sets a couple of privoxy-specific environment variables + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : N/A + * + *********************************************************************/ +static void set_privoxy_variables(const struct client_state *csp) +{ + int i; + struct { + const char *name; + const char *value; + } env[] = { + { "PRIVOXY_URL", csp->http->url }, + { "PRIVOXY_PATH", csp->http->path }, + { "PRIVOXY_HOST", csp->http->host }, + { "PRIVOXY_ORIGIN", csp->ip_addr_str }, + }; + + for (i = 0; i < SZ(env); i++) + { + if (setenv(env[i].name, env[i].value, 1)) + { + log_error(LOG_LEVEL_ERROR, "Failed to set %s=%s: %E", + env[i].name, env[i].value); + } + } +} + + +/********************************************************************* + * + * Function : execute_external_filter + * + * Description : Pipe content into external filter and return the output + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : name = Name of the external filter to execute + * 3 : content = The original content to filter + * 4 : size = The size of the content buffer + * + * Returns : a pointer to the (newly allocated) modified buffer. + * or NULL if there were no hits or something went wrong + * + *********************************************************************/ +static char *execute_external_filter(const struct client_state *csp, + const char *name, char *content, size_t *size) +{ + char cmd[200]; + char file_name[FILENAME_MAX]; + FILE *fp; + char *filter_output; + int fd; + int ret; + size_t new_size; + const char *external_filter; + + if (csp->config->temporary_directory == NULL) + { + log_error(LOG_LEVEL_ERROR, + "No temporary-directory configured. Can't execute filter: %s", + name); + return NULL; + } + + external_filter = get_external_filter(csp, name); + + if (sizeof(file_name) < snprintf(file_name, sizeof(file_name), + "%s/privoxy-XXXXXXXX", csp->config->temporary_directory)) + { + log_error(LOG_LEVEL_ERROR, "temporary-directory path too long"); + return NULL; + } + + fd = mkstemp(file_name); + if (fd == -1) + { + log_error(LOG_LEVEL_ERROR, "mkstemp() failed to create %s: %E", file_name); + return NULL; + } + + fp = fdopen(fd, "w"); + if (fp == NULL) + { + log_error(LOG_LEVEL_ERROR, "fdopen() failed: %E"); + unlink(file_name); + return NULL; + } + + /* + * The size may be zero if a previous filter discarded everything. + * + * This isn't necessary unintentional, so we just don't try + * to fwrite() nothing and let the user deal with the rest. + */ + if ((*size != 0) && fwrite(content, *size, 1, fp) != 1) + { + log_error(LOG_LEVEL_ERROR, "fwrite(..., %d, 1, ..) failed: %E", *size); + unlink(file_name); + return NULL; + } + fclose(fp); + + if (sizeof(cmd) < snprintf(cmd, sizeof(cmd), "%s < %s", external_filter, file_name)) + { + log_error(LOG_LEVEL_ERROR, + "temporary-directory or external filter path too long"); + unlink(file_name); + return NULL; + } + + log_error(LOG_LEVEL_RE_FILTER, "Executing '%s': %s", name, cmd); + + /* + * The locking is necessary to prevent other threads + * from overwriting the environment variables before + * the popen fork. Afterwards this no longer matters. + */ + privoxy_mutex_lock(&external_filter_mutex); + set_privoxy_variables(csp); + fp = popen(cmd, "r"); + privoxy_mutex_unlock(&external_filter_mutex); + if (fp == NULL) + { + log_error(LOG_LEVEL_ERROR, "popen(\"%s\", \"r\") failed: %E", cmd); + unlink(file_name); + return NULL; + } + + filter_output = malloc_or_die(*size); + + new_size = 0; + while (!feof(fp) && !ferror(fp)) + { + size_t len; + /* Could be bigger ... */ + enum { READ_LENGTH = 2048 }; + + if (new_size + READ_LENGTH >= *size) + { + char *p; + + /* Could be considered wasteful if the content is 'large'. */ + *size = (*size != 0) ? *size * 2 : READ_LENGTH; + + p = realloc(filter_output, *size); + if (p == NULL) + { + log_error(LOG_LEVEL_ERROR, "Out of memory while reading " + "external filter output. Using what we got so far."); + break; + } + filter_output = p; + } + len = fread(&filter_output[new_size], 1, READ_LENGTH, fp); + if (len > 0) + { + new_size += len; + } + } + + ret = pclose(fp); + if (ret == -1) + { + log_error(LOG_LEVEL_ERROR, "Executing %s failed: %E", cmd); + } + else + { + log_error(LOG_LEVEL_RE_FILTER, + "Executing '%s' resulted in return value %d. " + "Read %d of up to %d bytes.", name, (ret >> 8), new_size, *size); + } + + unlink(file_name); + *size = new_size; + + return filter_output; + +} +#endif /* def FEATURE_EXTERNAL_FILTERS */ + + /********************************************************************* * * Function : gif_deanimate_response @@ -1798,7 +2025,8 @@ static char *gif_deanimate_response(struct client_state *csp) * Function : get_filter_function * * Description : Decides which content filter function has - * to be applied (if any). + * to be applied (if any). Only considers functions + * for internal filters which are mutually-exclusive. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -1821,7 +2049,7 @@ static filter_function_ptr get_filter_function(const struct client_state *csp) { filter_function = pcrs_filter_response; } - else if ((csp->content_type & CT_GIF) && + else if ((csp->content_type & CT_GIF) && (csp->action->flags & ACTION_DEANIMATE)) { filter_function = gif_deanimate_response; @@ -1836,7 +2064,8 @@ static filter_function_ptr get_filter_function(const struct client_state *csp) * Function : remove_chunked_transfer_coding * * Description : In-situ remove the "chunked" transfer coding as defined - * in rfc2616 from a buffer. + * in RFC 7230 4.1 from a buffer. XXX: The implementation + * is neither complete nor compliant (TODO #129). * * Parameters : * 1 : buffer = Pointer to the text buffer @@ -1993,6 +2222,7 @@ static jb_err prepare_for_filtering(struct client_state *csp) *********************************************************************/ char *execute_content_filters(struct client_state *csp) { + char *content; filter_function_ptr content_filter; assert(content_filters_enabled(csp->action)); @@ -2023,8 +2253,34 @@ char *execute_content_filters(struct client_state *csp) } content_filter = get_filter_function(csp); + content = (content_filter != NULL) ? (*content_filter)(csp) : NULL; + +#ifdef FEATURE_EXTERNAL_FILTERS + if ((csp->content_type & CT_TEXT) && + (csp->rlist != NULL) && + !list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER])) + { + struct list_entry *filtername; + size_t size = (size_t)csp->content_length; + + if (content == NULL) + { + content = csp->iob->cur; + size = (size_t)(csp->iob->eod - csp->iob->cur); + } + + for (filtername = csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]->first; + filtername ; filtername = filtername->next) + { + content = execute_external_filter(csp, filtername->str, content, &size); + } + csp->flags |= CSP_FLAG_MODIFIED; + csp->content_length = size; + } +#endif /* def FEATURE_EXTERNAL_FILTERS */ + + return content; - return ((*content_filter)(csp)); } @@ -2404,7 +2660,8 @@ int content_requires_filtering(struct client_state *csp) */ if ((csp->content_type & CT_TEXT) && (csp->rlist != NULL) && - (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]) || + !list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]))) { return TRUE; } @@ -2435,7 +2692,8 @@ int content_requires_filtering(struct client_state *csp) int content_filters_enabled(const struct current_action_spec *action) { return ((action->flags & ACTION_DEANIMATE) || - !list_is_empty(action->multi[ACTION_MULTI_FILTER])); + !list_is_empty(action->multi[ACTION_MULTI_FILTER]) || + !list_is_empty(action->multi[ACTION_MULTI_EXTERNAL_FILTER])); }