-const char filters_rcs[] = "$Id: filters.c,v 1.178 2013/11/24 14:22:51 fabiankeil Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.188 2014/10/18 11:25:57 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/filters.c,v $
*
* Purpose : Declares functions to parse/crunch headers and pages.
*
- * Copyright : Written by and Copyright (C) 2001-2011 the
+ * Copyright : Written by and Copyright (C) 2001-2014 the
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
#include "miscutil.h"
#include "actions.h"
#include "cgi.h"
+#include "jcc.h"
#include "list.h"
#include "deanimate.h"
#include "urlmatch.h"
* properly formatted URL and use it for the redirection
* directly.
*
- * According to RFC 2616 section 14.30 the URL
- * has to be absolute and if the user tries:
- * +redirect{shit/this/will/be/parsed/as/pcrs_command.html}
+ * According to (the now obsolete) RFC 2616 section 14.30
+ * the URL has to be absolute and if the user tries:
+ * +redirect{sadly/this/will/be/parsed/as/pcrs_command.html}
* she would get undefined results anyway.
*
+ * RFC 7231 7.1.2 actually allows relative references,
+ * but those start with a leading slash (RFC 3986 4.2) and
+ * thus can't be mistaken for pcrs commands either.
*/
if (*redirection_string == 's')
for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
filtername != NULL; filtername = filtername->next)
{
+ int current_hits = 0; /* Number of hits caused by this filter */
+ int job_number = 0; /* Which job we're currently executing */
+ int job_hits = 0; /* How many hits the current job caused */
+ pcrs_job *joblist;
+
b = get_filter(csp, filtername->str, FT_CONTENT_FILTER);
if (b == NULL)
{
continue;
}
- {
- int current_hits = 0; /* Number of hits caused by this filter */
- int job_number = 0; /* Which job we're currently executing */
- int job_hits = 0; /* How many hits the current job caused */
- pcrs_job *joblist = b->joblist;
-
- if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
- if (NULL == joblist)
- {
- log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
- continue;
- }
+ joblist = b->joblist;
- prev_size = size;
- /* Apply all jobs from the joblist */
- for (job = joblist; NULL != job; job = job->next)
- {
- job_number++;
- job_hits = pcrs_execute(job, old, size, &new, &size);
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
- if (job_hits >= 0)
- {
- /*
- * That went well. Continue filtering
- * and use the result of this job as
- * input for the next one.
- */
- current_hits += job_hits;
- if (old != csp->iob->cur)
- {
- freez(old);
- }
- old = new;
- }
- else
- {
- /*
- * This job caused an unexpected error. Inform the user
- * and skip the rest of the jobs in this filter. We could
- * continue with the next job, but usually the jobs
- * depend on each other or are similar enough to
- * fail for the same reason.
- *
- * At the moment our pcrs expects the error codes of pcre 3.4,
- * but newer pcre versions can return additional error codes.
- * As a result pcrs_strerror()'s error message might be
- * "Unknown error ...", therefore we print the numerical value
- * as well.
- *
- * XXX: Is this important enough for LOG_LEVEL_ERROR or
- * should we use LOG_LEVEL_RE_FILTER instead?
- */
- log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
- b->name, job_number, pcrs_strerror(job_hits), job_hits);
- break;
- }
- }
-
- if (b->dynamic) pcrs_free_joblist(joblist);
+ if (NULL == joblist)
+ {
+ log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
+ continue;
+ }
- log_error(LOG_LEVEL_RE_FILTER,
- "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
- csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
+ prev_size = size;
+ /* Apply all jobs from the joblist */
+ for (job = joblist; NULL != job; job = job->next)
+ {
+ job_number++;
+ job_hits = pcrs_execute(job, old, size, &new, &size);
- hits += current_hits;
+ if (job_hits >= 0)
+ {
+ /*
+ * That went well. Continue filtering
+ * and use the result of this job as
+ * input for the next one.
+ */
+ current_hits += job_hits;
+ if (old != csp->iob->cur)
+ {
+ freez(old);
+ }
+ old = new;
+ }
+ else
+ {
+ /*
+ * This job caused an unexpected error. Inform the user
+ * and skip the rest of the jobs in this filter. We could
+ * continue with the next job, but usually the jobs
+ * depend on each other or are similar enough to
+ * fail for the same reason.
+ *
+ * At the moment our pcrs expects the error codes of pcre 3.4,
+ * but newer pcre versions can return additional error codes.
+ * As a result pcrs_strerror()'s error message might be
+ * "Unknown error ...", therefore we print the numerical value
+ * as well.
+ *
+ * XXX: Is this important enough for LOG_LEVEL_ERROR or
+ * should we use LOG_LEVEL_RE_FILTER instead?
+ */
+ log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
+ b->name, job_number, pcrs_strerror(job_hits), job_hits);
+ break;
}
}
+ if (b->dynamic) pcrs_free_joblist(joblist);
+
+ log_error(LOG_LEVEL_RE_FILTER,
+ "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
+ csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
+
+ hits += current_hits;
+ }
/*
* If there were no hits, destroy our copy and let
}
+#ifdef FEATURE_EXTERNAL_FILTERS
+/*********************************************************************
+ *
+ * Function : get_external_filter
+ *
+ * Description : Lookup the code to execute for an external filter.
+ * Masks the misuse of the re_filterfile_spec.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : name = Name of the content filter to get
+ *
+ * Returns : A pointer to the requested code
+ * or NULL if the filter wasn't found
+ *
+ *********************************************************************/
+static const char *get_external_filter(const struct client_state *csp,
+ const char *name)
+{
+ struct re_filterfile_spec *external_filter;
+
+ external_filter = get_filter(csp, name, FT_EXTERNAL_CONTENT_FILTER);
+ if (external_filter == NULL)
+ {
+ log_error(LOG_LEVEL_FATAL,
+ "Didn't find stuff to execute for external filter: %s",
+ name);
+ }
+
+ return external_filter->patterns->first->str;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function : set_privoxy_variables
+ *
+ * Description : Sets a couple of privoxy-specific environment variables
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : N/A
+ *
+ *********************************************************************/
+static void set_privoxy_variables(const struct client_state *csp)
+{
+ int i;
+ struct {
+ const char *name;
+ const char *value;
+ } env[] = {
+ { "PRIVOXY_URL", csp->http->url },
+ { "PRIVOXY_PATH", csp->http->path },
+ { "PRIVOXY_HOST", csp->http->host },
+ { "PRIVOXY_ORIGIN", csp->ip_addr_str },
+ };
+
+ for (i = 0; i < SZ(env); i++)
+ {
+ if (setenv(env[i].name, env[i].value, 1))
+ {
+ log_error(LOG_LEVEL_ERROR, "Failed to set %s=%s: %E",
+ env[i].name, env[i].value);
+ }
+ }
+}
+
+
+/*********************************************************************
+ *
+ * Function : execute_external_filter
+ *
+ * Description : Pipe content into external filter and return the output
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : name = Name of the external filter to execute
+ * 3 : content = The original content to filter
+ * 4 : size = The size of the content buffer
+ *
+ * Returns : a pointer to the (newly allocated) modified buffer.
+ * or NULL if there were no hits or something went wrong
+ *
+ *********************************************************************/
+static char *execute_external_filter(const struct client_state *csp,
+ const char *name, char *content, size_t *size)
+{
+ char cmd[200];
+ char file_name[FILENAME_MAX];
+ FILE *fp;
+ char *filter_output;
+ int fd;
+ int ret;
+ size_t new_size;
+ const char *external_filter;
+
+ if (csp->config->temporary_directory == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "No temporary-directory configured. Can't execute filter: %s",
+ name);
+ return NULL;
+ }
+
+ external_filter = get_external_filter(csp, name);
+
+ if (sizeof(file_name) < snprintf(file_name, sizeof(file_name),
+ "%s/privoxy-XXXXXXXX", csp->config->temporary_directory))
+ {
+ log_error(LOG_LEVEL_ERROR, "temporary-directory path too long");
+ return NULL;
+ }
+
+ fd = mkstemp(file_name);
+ if (fd == -1)
+ {
+ log_error(LOG_LEVEL_ERROR, "mkstemp() failed to create %s: %E", file_name);
+ return NULL;
+ }
+
+ fp = fdopen(fd, "w");
+ if (fp == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "fdopen() failed: %E");
+ unlink(file_name);
+ return NULL;
+ }
+
+ /*
+ * The size may be zero if a previous filter discarded everything.
+ *
+ * This isn't necessary unintentional, so we just don't try
+ * to fwrite() nothing and let the user deal with the rest.
+ */
+ if ((*size != 0) && fwrite(content, *size, 1, fp) != 1)
+ {
+ log_error(LOG_LEVEL_ERROR, "fwrite(..., %d, 1, ..) failed: %E", *size);
+ unlink(file_name);
+ fclose(fp);
+ return NULL;
+ }
+ fclose(fp);
+
+ if (sizeof(cmd) < snprintf(cmd, sizeof(cmd), "%s < %s", external_filter, file_name))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "temporary-directory or external filter path too long");
+ unlink(file_name);
+ return NULL;
+ }
+
+ log_error(LOG_LEVEL_RE_FILTER, "Executing '%s': %s", name, cmd);
+
+ /*
+ * The locking is necessary to prevent other threads
+ * from overwriting the environment variables before
+ * the popen fork. Afterwards this no longer matters.
+ */
+ privoxy_mutex_lock(&external_filter_mutex);
+ set_privoxy_variables(csp);
+ fp = popen(cmd, "r");
+ privoxy_mutex_unlock(&external_filter_mutex);
+ if (fp == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "popen(\"%s\", \"r\") failed: %E", cmd);
+ unlink(file_name);
+ return NULL;
+ }
+
+ filter_output = malloc_or_die(*size);
+
+ new_size = 0;
+ while (!feof(fp) && !ferror(fp))
+ {
+ size_t len;
+ /* Could be bigger ... */
+ enum { READ_LENGTH = 2048 };
+
+ if (new_size + READ_LENGTH >= *size)
+ {
+ char *p;
+
+ /* Could be considered wasteful if the content is 'large'. */
+ *size = (*size != 0) ? *size * 2 : READ_LENGTH;
+
+ p = realloc(filter_output, *size);
+ if (p == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "Out of memory while reading "
+ "external filter output. Using what we got so far.");
+ break;
+ }
+ filter_output = p;
+ }
+ len = fread(&filter_output[new_size], 1, READ_LENGTH, fp);
+ if (len > 0)
+ {
+ new_size += len;
+ }
+ }
+
+ ret = pclose(fp);
+ if (ret == -1)
+ {
+ log_error(LOG_LEVEL_ERROR, "Executing %s failed: %E", cmd);
+ }
+ else
+ {
+ log_error(LOG_LEVEL_RE_FILTER,
+ "Executing '%s' resulted in return value %d. "
+ "Read %d of up to %d bytes.", name, (ret >> 8), new_size, *size);
+ }
+
+ unlink(file_name);
+ *size = new_size;
+
+ return filter_output;
+
+}
+#endif /* def FEATURE_EXTERNAL_FILTERS */
+
+
/*********************************************************************
*
* Function : gif_deanimate_response
* Function : get_filter_function
*
* Description : Decides which content filter function has
- * to be applied (if any).
+ * to be applied (if any). Only considers functions
+ * for internal filters which are mutually-exclusive.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
{
filter_function = pcrs_filter_response;
}
- else if ((csp->content_type & CT_GIF) &&
+ else if ((csp->content_type & CT_GIF) &&
(csp->action->flags & ACTION_DEANIMATE))
{
filter_function = gif_deanimate_response;
* Function : remove_chunked_transfer_coding
*
* Description : In-situ remove the "chunked" transfer coding as defined
- * in rfc2616 from a buffer.
+ * in RFC 7230 4.1 from a buffer. XXX: The implementation
+ * is neither complete nor compliant (TODO #129).
*
* Parameters :
* 1 : buffer = Pointer to the text buffer
*********************************************************************/
char *execute_content_filters(struct client_state *csp)
{
+ char *content;
filter_function_ptr content_filter;
assert(content_filters_enabled(csp->action));
}
content_filter = get_filter_function(csp);
+ content = (content_filter != NULL) ? (*content_filter)(csp) : NULL;
+
+#ifdef FEATURE_EXTERNAL_FILTERS
+ if ((csp->content_type & CT_TEXT) &&
+ (csp->rlist != NULL) &&
+ !list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]))
+ {
+ struct list_entry *filtername;
+ size_t size = (size_t)csp->content_length;
+
+ if (content == NULL)
+ {
+ content = csp->iob->cur;
+ size = (size_t)(csp->iob->eod - csp->iob->cur);
+ }
+
+ for (filtername = csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]->first;
+ filtername ; filtername = filtername->next)
+ {
+ char *result = execute_external_filter(csp, filtername->str, content, &size);
+ if (result != NULL)
+ {
+ if (content != csp->iob->cur)
+ {
+ free(content);
+ }
+ content = result;
+ }
+ }
+ csp->flags |= CSP_FLAG_MODIFIED;
+ csp->content_length = size;
+ }
+#endif /* def FEATURE_EXTERNAL_FILTERS */
+
+ return content;
- return ((*content_filter)(csp));
}
* Invalid syntax is fatal.
*
*********************************************************************/
-const static struct forward_spec *get_forward_override_settings(struct client_state *csp)
+static const struct forward_spec *get_forward_override_settings(struct client_state *csp)
{
const char *forward_override_line = csp->action->string[ACTION_STRING_FORWARD_OVERRIDE];
char forward_settings[BUFFER_SIZE];
*/
if ((csp->content_type & CT_TEXT) &&
(csp->rlist != NULL) &&
- (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
+ (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]) ||
+ !list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER])))
{
return TRUE;
}
int content_filters_enabled(const struct current_action_spec *action)
{
return ((action->flags & ACTION_DEANIMATE) ||
- !list_is_empty(action->multi[ACTION_MULTI_FILTER]));
+ !list_is_empty(action->multi[ACTION_MULTI_FILTER]) ||
+ !list_is_empty(action->multi[ACTION_MULTI_EXTERNAL_FILTER]));
}