-const char filters_rcs[] = "$Id: filters.c,v 1.148 2011/10/30 16:16:07 fabiankeil Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.201 2016/03/17 10:40:53 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/filters.c,v $
*
* Purpose : Declares functions to parse/crunch headers and pages.
- * Functions declared include:
- * `acl_addr', `add_stats', `block_acl', `block_imageurl',
- * `block_url', `url_actions', `domain_split',
- * `filter_popups', `forward_url', 'redirect_url',
- * `ij_untrusted_url', `intercept_url', `pcrs_filter_respose',
- * `ijb_send_banner', `trust_url', `gif_deanimate_response',
- * `execute_single_pcrs_command', `rewrite_url',
- * `get_last_url'
- *
- * Copyright : Written by and Copyright (C) 2001-2010 the
+ *
+ * Copyright : Written by and Copyright (C) 2001-2016 the
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
#include "miscutil.h"
#include "actions.h"
#include "cgi.h"
+#include "jcc.h"
#include "list.h"
#include "deanimate.h"
#include "urlmatch.h"
#include "loaders.h"
+#ifdef FEATURE_CLIENT_TAGS
+#include "client-tags.h"
+#endif
#ifdef _WIN32
#include "win32.h"
const char filters_h_rcs[] = FILTERS_H_VERSION;
-/* Fix a problem with Solaris. There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
- * as an array index. Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- */
-#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
-
typedef char *(*filter_function_ptr)();
static filter_function_ptr get_filter_function(const struct client_state *csp);
static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size);
static jb_err prepare_for_filtering(struct client_state *csp);
+static void apply_url_actions(struct current_action_spec *action,
+ struct http_request *http,
+#ifdef FEATURE_CLIENT_TAGS
+ const struct list *client_tags,
+#endif
+ struct url_actions *b);
#ifdef FEATURE_ACL
#ifdef HAVE_RFC2553
if (network->ss_family != netmask->ss_family)
{
/* This should never happen */
- log_error(LOG_LEVEL_ERROR,
- "Internal error at %s:%llu: network and netmask differ in family",
- __FILE__, __LINE__);
- return 0;
+ assert(network->ss_family == netmask->ss_family);
+ log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family.");
}
sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port);
netmask_addr += 12;
addr_len = 4;
}
- else if (network->ss_family != address->ss_family)
- {
- return 0;
- }
/* XXX: Port check is signaled in netmask */
if (*netmask_port && *network_port != *address_port)
* Use a temporary acl spec copy so we can log
* the unmodified original in case of parse errors.
*/
- acl_spec = strdup(aspec);
- if (acl_spec == NULL)
- {
- /* XXX: This will be logged as parse error. */
- return(-1);
- }
+ acl_spec = strdup_or_die(aspec);
if ((p = strchr(acl_spec, '/')) != NULL)
{
*p++ = '\0';
- if (ijb_isdigit(*p) == 0)
+ if (privoxy_isdigit(*p) == 0)
{
freez(acl_spec);
return(-1);
{
p = strchr(acl_spec, ':');
}
+ if (p != NULL)
+ {
+ assert(*p == ':');
+ *p = '\0';
+ p++;
+ }
#ifdef HAVE_RFC2553
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
- i = getaddrinfo(acl_spec, ((p) ? ++p : NULL), &hints, &result);
+ i = getaddrinfo(acl_spec, p, &hints, &result);
if (i != 0)
{
{
char *endptr;
- *p++ = '\0';
port = strtol(p, &endptr, 10);
if (port <= 0 || port > 65535 || *endptr != '\0')
/* determine HOW images should be blocked */
p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER];
- if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
{
log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image.");
}
/* and handle accordingly: */
if ((p == NULL) || (0 == strcmpic(p, "pattern")))
{
- rsp->status = strdup("403 Request blocked by Privoxy");
- if (rsp->status == NULL)
- {
- free_http_response(rsp);
- return cgi_error_memory();
- }
+ rsp->status = strdup_or_die("403 Request blocked by Privoxy");
rsp->body = bindup(image_pattern_data, image_pattern_length);
if (rsp->body == NULL)
{
}
else if (0 == strcmpic(p, "blank"))
{
- rsp->status = strdup("403 Request blocked by Privoxy");
- if (rsp->status == NULL)
- {
- free_http_response(rsp);
- return cgi_error_memory();
- }
+ rsp->status = strdup_or_die("403 Request blocked by Privoxy");
rsp->body = bindup(image_blank_data, image_blank_length);
if (rsp->body == NULL)
{
}
else
{
- rsp->status = strdup("302 Local Redirect from Privoxy");
- if (rsp->status == NULL)
- {
- free_http_response(rsp);
- return cgi_error_memory();
- }
+ rsp->status = strdup_or_die("302 Local Redirect from Privoxy");
if (enlist_unique_header(rsp->headers, "Location", p))
{
}
else
#endif /* def FEATURE_IMAGE_BLOCKING */
- if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
{
/*
* Send empty document.
new_content_type = csp->action->string[ACTION_STRING_CONTENT_TYPE];
freez(rsp->body);
- rsp->body = strdup(" ");
+ rsp->body = strdup_or_die(" ");
rsp->content_length = 1;
if (csp->config->feature_flags & RUNTIME_FEATURE_EMPTY_DOC_RETURNS_OK)
* Return a 200 OK status for pages blocked with +handle-as-empty-document
* if the "handle-as-empty-doc-returns-ok" runtime config option is set.
*/
- rsp->status = strdup("200 Request blocked by Privoxy");
+ rsp->status = strdup_or_die("200 Request blocked by Privoxy");
}
else
{
- rsp->status = strdup("403 Request blocked by Privoxy");
+ rsp->status = strdup_or_die("403 Request blocked by Privoxy");
}
- if (rsp->status == NULL)
- {
- free_http_response(rsp);
- return cgi_error_memory();
- }
if (new_content_type != 0)
{
log_error(LOG_LEVEL_HEADER, "Overwriting Content-Type with %s", new_content_type);
jb_err err;
struct map * exports;
- rsp->status = strdup("403 Request blocked by Privoxy");
- if (rsp->status == NULL)
- {
- free_http_response(rsp);
- return cgi_error_memory();
- }
+ rsp->status = strdup_or_die("403 Request blocked by Privoxy");
exports = default_exports(csp, NULL);
if (exports == NULL)
struct map * exports;
char buf[BUFFER_SIZE];
char *p;
- struct url_spec **tl;
- struct url_spec *t;
+ struct pattern_spec **tl;
+ struct pattern_spec *t;
jb_err err;
/*
return cgi_error_memory();
}
- rsp->status = strdup("403 Request blocked by Privoxy");
+ rsp->status = strdup_or_die("403 Request blocked by Privoxy");
exports = default_exports(csp, NULL);
- if (exports == NULL || rsp->status == NULL)
+ if (exports == NULL)
{
free_http_response(rsp);
return cgi_error_memory();
/*
* Export the trust list
*/
- p = strdup("");
+ p = strdup_or_die("");
for (tl = csp->config->trust_list; (t = *tl) != NULL ; tl++)
{
snprintf(buf, sizeof(buf), "<li>%s</li>\n", t->spec);
{
struct list_entry *l;
- p = strdup("");
+ p = strdup_or_die("");
for (l = csp->config->trust_info->first; l ; l = l->next)
{
snprintf(buf, sizeof(buf), "<li> <a href=\"%s\">%s</a><br>\n", l->str, l->str);
{"path", csp->http->path, 1},
{"host", csp->http->host, 1},
{"origin", csp->ip_addr_str, 1},
+ {"listen-address", csp->listen_addr_str, 1},
{NULL, NULL, 1}
};
return NULL;
}
- if (0 == strcmpic(redirect_mode, "check-decoded-url"))
+ if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%'))
{
+ char *url_segment = NULL;
+ char **url_segments;
+ size_t max_segments;
+ int segments;
+
log_error(LOG_LEVEL_REDIRECTS,
"Checking \"%s\" for encoded redirects.", subject);
+
/*
* Check each parameter in the URL separately.
* Sectionize the URL at "?" and "&",
- * then URL-decode each component,
+ * go backwards through the segments, URL-decode them
* and look for a URL in the decoded result.
- * Keep the last one we spot.
+ * Stop the search after the first match.
+ *
+ * XXX: This estimate is guaranteed to be high enough as we
+ * let ssplit() ignore empty fields, but also a bit wasteful.
*/
- char *found = NULL;
- char *token = strtok(subject, "?&");
- while (token)
- {
- char *dtoken = url_decode(token);
- if (!dtoken) continue;
- char *h1 = strstr(dtoken, "http://");
- char *h2 = strstr(dtoken, "https://");
- char *h = (h1 && h2
- ? (h1 < h2 ? h1 : h2)
- : (h1 ? h1 : h2));
- if (h)
- {
- freez(found);
- found = strdup(h);
- if (found == NULL)
- {
- log_error(LOG_LEVEL_ERROR,
- "Out of memory while searching for redirects.");
- return NULL;
- }
- }
- token = strtok(NULL, "?&");
- }
+ max_segments = strlen(subject) / 2;
+ url_segments = malloc(max_segments * sizeof(char *));
- if (found)
+ if (NULL == url_segments)
{
+ log_error(LOG_LEVEL_ERROR,
+ "Out of memory while decoding URL: %s", subject);
freez(subject);
- return found;
+ return NULL;
}
- freez(subject);
- return NULL;
- }
+ segments = ssplit(subject, "?&", url_segments, max_segments);
- /* Else, just look for a URL inside this one, without decoding anything. */
+ while (segments-- > 0)
+ {
+ char *dtoken = url_decode(url_segments[segments]);
+ if (NULL == dtoken)
+ {
+ log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]);
+ continue;
+ }
+ url_segment = strstr(dtoken, "http://");
+ if (NULL == url_segment)
+ {
+ url_segment = strstr(dtoken, "https://");
+ }
+ if (NULL != url_segment)
+ {
+ url_segment = strdup_or_die(url_segment);
+ freez(dtoken);
+ break;
+ }
+ freez(dtoken);
+ }
+ freez(subject);
+ freez(url_segments);
- log_error(LOG_LEVEL_REDIRECTS,
- "Checking \"%s\" for unencoded redirects.", subject);
+ if (url_segment == NULL)
+ {
+ return NULL;
+ }
+ subject = url_segment;
+ }
+ else
+ {
+ /* Look for a URL inside this one, without decoding anything. */
+ log_error(LOG_LEVEL_REDIRECTS,
+ "Checking \"%s\" for unencoded redirects.", subject);
+ }
/*
* Find the last URL encoded in the request
* properly formatted URL and use it for the redirection
* directly.
*
- * According to RFC 2616 section 14.30 the URL
- * has to be absolute and if the user tries:
- * +redirect{shit/this/will/be/parsed/as/pcrs_command.html}
+ * According to (the now obsolete) RFC 2616 section 14.30
+ * the URL has to be absolute and if the user tries:
+ * +redirect{sadly/this/will/be/parsed/as/pcrs_command.html}
* she would get undefined results anyway.
*
+ * RFC 7231 7.1.2 actually allows relative references,
+ * but those start with a leading slash (RFC 3986 4.2) and
+ * thus can't be mistaken for pcrs commands either.
*/
if (*redirection_string == 's')
/* Did any redirect action trigger? */
if (new_url)
{
+ if (url_requires_percent_encoding(new_url))
+ {
+ char *encoded_url;
+ log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N",
+ strlen(new_url), new_url);
+ encoded_url = percent_encode_url(new_url);
+ freez(new_url);
+ if (encoded_url == NULL)
+ {
+ return cgi_error_memory();
+ }
+ new_url = encoded_url;
+ assert(FALSE == url_requires_percent_encoding(new_url));
+ }
+
if (0 == strcmpic(new_url, csp->http->url))
{
log_error(LOG_LEVEL_ERROR,
return cgi_error_memory();
}
- if ( enlist_unique_header(rsp->headers, "Location", new_url)
- || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+ rsp->status = strdup_or_die("302 Local Redirect from Privoxy");
+ if (enlist_unique_header(rsp->headers, "Location", new_url))
{
freez(new_url);
free_http_response(rsp);
{
struct file_list *fl;
struct block_spec *b;
- struct url_spec **trusted_url;
+ struct pattern_spec **trusted_url;
struct http_request rhttp[1];
const char * referer;
jb_err err;
{
char * path;
char * path_end;
- char * new_entry = strdup("~");
+ char * new_entry = strdup_or_die("~");
string_append(&new_entry, csp->http->hostport);
#endif /* def FEATURE_TRUST */
+/*********************************************************************
+ *
+ * Function : get_filter
+ *
+ * Description : Get a filter with a given name and type.
+ * Note that taggers are filters, too.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : requested_name = Name of the content filter to get
+ * 3 : requested_type = Type of the filter to tagger to lookup
+ *
+ * Returns : A pointer to the requested filter
+ * or NULL if the filter wasn't found
+ *
+ *********************************************************************/
+struct re_filterfile_spec *get_filter(const struct client_state *csp,
+ const char *requested_name,
+ enum filter_type requested_type)
+{
+ int i;
+ struct re_filterfile_spec *b;
+ struct file_list *fl;
+
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if ((NULL == fl) || (NULL == fl->f))
+ {
+ /*
+ * Either there are no filter files left or this
+ * filter file just contains no valid filters.
+ *
+ * Continue to be sure we don't miss valid filter
+ * files that are chained after empty or invalid ones.
+ */
+ continue;
+ }
+
+ for (b = fl->f; b != NULL; b = b->next)
+ {
+ if (b->type != requested_type)
+ {
+ /* The callers isn't interested in this filter type. */
+ continue;
+ }
+ if (strcmp(b->name, requested_name) == 0)
+ {
+ /* The requested filter has been found. Abort search. */
+ return b;
+ }
+ }
+ }
+
+ /* No filter with the given name and type exists. */
+ return NULL;
+
+}
+
+
/*********************************************************************
*
* Function : pcrs_filter_response
static char *pcrs_filter_response(struct client_state *csp)
{
int hits = 0;
- int i;
size_t size, prev_size;
char *old = NULL;
char *new = NULL;
pcrs_job *job;
- struct file_list *fl;
struct re_filterfile_spec *b;
struct list_entry *filtername;
size = (size_t)(csp->iob->eod - csp->iob->cur);
old = csp->iob->cur;
- for (i = 0; i < MAX_AF_FILES; i++)
- {
- fl = csp->rlist[i];
- if ((NULL == fl) || (NULL == fl->f))
- {
- /*
- * Either there are no filter files
- * left, or this filter file just
- * contains no valid filters.
- *
- * Continue to be sure we don't miss
- * valid filter files that are chained
- * after empty or invalid ones.
- */
- continue;
- }
/*
* For all applying +filter actions, look if a filter by that
* name exists and if yes, execute it's pcrs_joblist on the
* buffer.
*/
- for (b = fl->f; b; b = b->next)
+ for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
+ filtername != NULL; filtername = filtername->next)
{
- if (b->type != FT_CONTENT_FILTER)
+ int current_hits = 0; /* Number of hits caused by this filter */
+ int job_number = 0; /* Which job we're currently executing */
+ int job_hits = 0; /* How many hits the current job caused */
+ pcrs_job *joblist;
+
+ b = get_filter(csp, filtername->str, FT_CONTENT_FILTER);
+ if (b == NULL)
{
- /* Skip header filters */
continue;
}
- for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
- filtername ; filtername = filtername->next)
- {
- if (strcmp(b->name, filtername->str) == 0)
- {
- int current_hits = 0; /* Number of hits caused by this filter */
- int job_number = 0; /* Which job we're currently executing */
- int job_hits = 0; /* How many hits the current job caused */
- pcrs_job *joblist = b->joblist;
+ joblist = b->joblist;
- if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
- if (NULL == joblist)
- {
- log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
- continue;
- }
+ if (NULL == joblist)
+ {
+ log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
+ continue;
+ }
- prev_size = size;
- /* Apply all jobs from the joblist */
- for (job = joblist; NULL != job; job = job->next)
- {
- job_number++;
- job_hits = pcrs_execute(job, old, size, &new, &size);
+ prev_size = size;
+ /* Apply all jobs from the joblist */
+ for (job = joblist; NULL != job; job = job->next)
+ {
+ job_number++;
+ job_hits = pcrs_execute(job, old, size, &new, &size);
- if (job_hits >= 0)
- {
- /*
- * That went well. Continue filtering
- * and use the result of this job as
- * input for the next one.
- */
- current_hits += job_hits;
- if (old != csp->iob->cur)
- {
- freez(old);
- }
- old = new;
- }
- else
- {
- /*
- * This job caused an unexpected error. Inform the user
- * and skip the rest of the jobs in this filter. We could
- * continue with the next job, but usually the jobs
- * depend on each other or are similar enough to
- * fail for the same reason.
- *
- * At the moment our pcrs expects the error codes of pcre 3.4,
- * but newer pcre versions can return additional error codes.
- * As a result pcrs_strerror()'s error message might be
- * "Unknown error ...", therefore we print the numerical value
- * as well.
- *
- * XXX: Is this important enough for LOG_LEVEL_ERROR or
- * should we use LOG_LEVEL_RE_FILTER instead?
- */
- log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
- b->name, job_number, pcrs_strerror(job_hits), job_hits);
- break;
- }
+ if (job_hits >= 0)
+ {
+ /*
+ * That went well. Continue filtering
+ * and use the result of this job as
+ * input for the next one.
+ */
+ current_hits += job_hits;
+ if (old != csp->iob->cur)
+ {
+ freez(old);
}
+ old = new;
+ }
+ else
+ {
+ /*
+ * This job caused an unexpected error. Inform the user
+ * and skip the rest of the jobs in this filter. We could
+ * continue with the next job, but usually the jobs
+ * depend on each other or are similar enough to
+ * fail for the same reason.
+ *
+ * At the moment our pcrs expects the error codes of pcre 3.4,
+ * but newer pcre versions can return additional error codes.
+ * As a result pcrs_strerror()'s error message might be
+ * "Unknown error ...", therefore we print the numerical value
+ * as well.
+ *
+ * XXX: Is this important enough for LOG_LEVEL_ERROR or
+ * should we use LOG_LEVEL_RE_FILTER instead?
+ */
+ log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
+ b->name, job_number, pcrs_strerror(job_hits), job_hits);
+ break;
+ }
+ }
- if (b->dynamic) pcrs_free_joblist(joblist);
+ if (b->dynamic) pcrs_free_joblist(joblist);
- log_error(LOG_LEVEL_RE_FILTER,
- "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
- csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
+ log_error(LOG_LEVEL_RE_FILTER,
+ "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
+ csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
- hits += current_hits;
- }
- }
- }
+ hits += current_hits;
}
/*
csp->flags |= CSP_FLAG_MODIFIED;
csp->content_length = size;
- IOB_RESET(csp);
+ clear_iob(csp->iob);
return(new);
}
+#ifdef FEATURE_EXTERNAL_FILTERS
+/*********************************************************************
+ *
+ * Function : get_external_filter
+ *
+ * Description : Lookup the code to execute for an external filter.
+ * Masks the misuse of the re_filterfile_spec.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : name = Name of the content filter to get
+ *
+ * Returns : A pointer to the requested code
+ * or NULL if the filter wasn't found
+ *
+ *********************************************************************/
+static const char *get_external_filter(const struct client_state *csp,
+ const char *name)
+{
+ struct re_filterfile_spec *external_filter;
+
+ external_filter = get_filter(csp, name, FT_EXTERNAL_CONTENT_FILTER);
+ if (external_filter == NULL)
+ {
+ log_error(LOG_LEVEL_FATAL,
+ "Didn't find stuff to execute for external filter: %s",
+ name);
+ }
+
+ return external_filter->patterns->first->str;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function : set_privoxy_variables
+ *
+ * Description : Sets a couple of privoxy-specific environment variables
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : N/A
+ *
+ *********************************************************************/
+static void set_privoxy_variables(const struct client_state *csp)
+{
+ int i;
+ struct {
+ const char *name;
+ const char *value;
+ } env[] = {
+ { "PRIVOXY_URL", csp->http->url },
+ { "PRIVOXY_PATH", csp->http->path },
+ { "PRIVOXY_HOST", csp->http->host },
+ { "PRIVOXY_ORIGIN", csp->ip_addr_str },
+ { "PRIVOXY_LISTEN_ADDRESS", csp->listen_addr_str },
+ };
+
+ for (i = 0; i < SZ(env); i++)
+ {
+ if (setenv(env[i].name, env[i].value, 1))
+ {
+ log_error(LOG_LEVEL_ERROR, "Failed to set %s=%s: %E",
+ env[i].name, env[i].value);
+ }
+ }
+}
+
+
+/*********************************************************************
+ *
+ * Function : execute_external_filter
+ *
+ * Description : Pipe content into external filter and return the output
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : name = Name of the external filter to execute
+ * 3 : content = The original content to filter
+ * 4 : size = The size of the content buffer
+ *
+ * Returns : a pointer to the (newly allocated) modified buffer.
+ * or NULL if there were no hits or something went wrong
+ *
+ *********************************************************************/
+static char *execute_external_filter(const struct client_state *csp,
+ const char *name, char *content, size_t *size)
+{
+ char cmd[200];
+ char file_name[FILENAME_MAX];
+ FILE *fp;
+ char *filter_output;
+ int fd;
+ int ret;
+ size_t new_size;
+ const char *external_filter;
+
+ if (csp->config->temporary_directory == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "No temporary-directory configured. Can't execute filter: %s",
+ name);
+ return NULL;
+ }
+
+ external_filter = get_external_filter(csp, name);
+
+ if (sizeof(file_name) < snprintf(file_name, sizeof(file_name),
+ "%s/privoxy-XXXXXXXX", csp->config->temporary_directory))
+ {
+ log_error(LOG_LEVEL_ERROR, "temporary-directory path too long");
+ return NULL;
+ }
+
+ fd = mkstemp(file_name);
+ if (fd == -1)
+ {
+ log_error(LOG_LEVEL_ERROR, "mkstemp() failed to create %s: %E", file_name);
+ return NULL;
+ }
+
+ fp = fdopen(fd, "w");
+ if (fp == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "fdopen() failed: %E");
+ unlink(file_name);
+ return NULL;
+ }
+
+ /*
+ * The size may be zero if a previous filter discarded everything.
+ *
+ * This isn't necessary unintentional, so we just don't try
+ * to fwrite() nothing and let the user deal with the rest.
+ */
+ if ((*size != 0) && fwrite(content, *size, 1, fp) != 1)
+ {
+ log_error(LOG_LEVEL_ERROR, "fwrite(..., %d, 1, ..) failed: %E", *size);
+ unlink(file_name);
+ fclose(fp);
+ return NULL;
+ }
+ fclose(fp);
+
+ if (sizeof(cmd) < snprintf(cmd, sizeof(cmd), "%s < %s", external_filter, file_name))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "temporary-directory or external filter path too long");
+ unlink(file_name);
+ return NULL;
+ }
+
+ log_error(LOG_LEVEL_RE_FILTER, "Executing '%s': %s", name, cmd);
+
+ /*
+ * The locking is necessary to prevent other threads
+ * from overwriting the environment variables before
+ * the popen fork. Afterwards this no longer matters.
+ */
+ privoxy_mutex_lock(&external_filter_mutex);
+ set_privoxy_variables(csp);
+ fp = popen(cmd, "r");
+ privoxy_mutex_unlock(&external_filter_mutex);
+ if (fp == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "popen(\"%s\", \"r\") failed: %E", cmd);
+ unlink(file_name);
+ return NULL;
+ }
+
+ /* Allocate at least one byte */
+ filter_output = malloc_or_die(*size + 1);
+
+ new_size = 0;
+ while (!feof(fp) && !ferror(fp))
+ {
+ size_t len;
+ /* Could be bigger ... */
+ enum { READ_LENGTH = 2048 };
+
+ if (new_size + READ_LENGTH >= *size)
+ {
+ char *p;
+
+ /* Could be considered wasteful if the content is 'large'. */
+ *size += (*size >= READ_LENGTH) ? *size : READ_LENGTH;
+
+ p = realloc(filter_output, *size);
+ if (p == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "Out of memory while reading "
+ "external filter output. Using what we got so far.");
+ break;
+ }
+ filter_output = p;
+ }
+ assert(new_size + READ_LENGTH < *size);
+ len = fread(&filter_output[new_size], 1, READ_LENGTH, fp);
+ if (len > 0)
+ {
+ new_size += len;
+ }
+ }
+
+ ret = pclose(fp);
+ if (ret == -1)
+ {
+ log_error(LOG_LEVEL_ERROR, "Executing %s failed: %E", cmd);
+ }
+ else
+ {
+ log_error(LOG_LEVEL_RE_FILTER,
+ "Executing '%s' resulted in return value %d. "
+ "Read %d of up to %d bytes.", name, (ret >> 8), new_size, *size);
+ }
+
+ unlink(file_name);
+ *size = new_size;
+
+ return filter_output;
+
+}
+#endif /* def FEATURE_EXTERNAL_FILTERS */
+
+
/*********************************************************************
*
* Function : gif_deanimate_response
size = (size_t)(csp->iob->eod - csp->iob->cur);
- if ( (NULL == (in = (struct binbuffer *)zalloc(sizeof *in )))
- || (NULL == (out = (struct binbuffer *)zalloc(sizeof *out))) )
- {
- log_error(LOG_LEVEL_DEANIMATE, "failed! (no mem)");
- return NULL;
- }
+ in = zalloc_or_die(sizeof(*in));
+ out = zalloc_or_die(sizeof(*out));
in->buffer = csp->iob->cur;
in->size = size;
* Function : get_filter_function
*
* Description : Decides which content filter function has
- * to be applied (if any).
+ * to be applied (if any). Only considers functions
+ * for internal filters which are mutually-exclusive.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
* the content type and action settings.
*/
if ((csp->content_type & CT_TEXT) &&
- (csp->rlist != NULL) &&
(!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
{
filter_function = pcrs_filter_response;
}
- else if ((csp->content_type & CT_GIF) &&
+ else if ((csp->content_type & CT_GIF) &&
(csp->action->flags & ACTION_DEANIMATE))
{
filter_function = gif_deanimate_response;
* Function : remove_chunked_transfer_coding
*
* Description : In-situ remove the "chunked" transfer coding as defined
- * in rfc2616 from a buffer.
+ * in RFC 7230 4.1 from a buffer. XXX: The implementation
+ * is neither complete nor compliant (TODO #129).
*
* Parameters :
* 1 : buffer = Pointer to the text buffer
size_t newsize = 0;
unsigned int chunksize = 0;
char *from_p, *to_p;
+ const char *end_of_buffer = buffer + *size;
assert(buffer);
from_p = to_p = buffer;
while (chunksize > 0U)
{
+ /*
+ * If the chunk-size is valid, we should have at least
+ * chunk-size bytes of chunk-data and five bytes of
+ * meta data (chunk-size, CRLF, CRLF) left in the buffer.
+ */
+ if (chunksize + 5 >= *size - newsize)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Chunk size %u exceeds buffered data left. "
+ "Already digested %u of %u buffered bytes.",
+ chunksize, (unsigned int)newsize, (unsigned int)*size);
+ return JB_ERR_PARSE;
+ }
+
+ /*
+ * Skip the chunk-size, the optional chunk-ext and the CRLF
+ * that is supposed to be located directly before the start
+ * of chunk-data.
+ */
if (NULL == (from_p = strstr(from_p, "\r\n")))
{
log_error(LOG_LEVEL_ERROR, "Parse error while stripping \"chunked\" transfer coding");
return JB_ERR_PARSE;
}
+ from_p += 2;
- if ((newsize += chunksize) >= *size)
+ /*
+ * The previous strstr() does not enforce chunk-validity
+ * and is sattisfied as long a CRLF is left in the buffer.
+ *
+ * Make sure the bytes we consider chunk-data are within
+ * the valid range.
+ */
+ if (from_p + chunksize >= end_of_buffer)
{
- /*
- * XXX: The message is a bit confusing. Isn't the real problem that
- * the specified chunk size is greater than the number of bytes
- * left in the buffer? This probably means the connection got
- * closed prematurely. To be investigated after 3.0.17 is out.
- */
log_error(LOG_LEVEL_ERROR,
- "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding",
- chunksize, *size);
+ "End of chunk is beyond the end of the buffer.");
return JB_ERR_PARSE;
}
- from_p += 2;
memmove(to_p, from_p, (size_t) chunksize);
+ newsize += chunksize;
to_p = buffer + newsize;
- from_p += chunksize + 2;
+ from_p += chunksize;
+ /*
+ * Not merging this check with the previous one allows us
+ * to keep chunks without trailing CRLF. It's not clear
+ * if we actually have to care about those, though.
+ */
+ if (from_p + 2 >= end_of_buffer)
+ {
+ log_error(LOG_LEVEL_ERROR, "Not enough room for trailing CRLF.");
+ return JB_ERR_PARSE;
+ }
+ from_p += 2;
if (sscanf(from_p, "%x", &chunksize) != 1)
{
log_error(LOG_LEVEL_INFO, "Invalid \"chunked\" transfer encoding detected and ignored.");
*********************************************************************/
char *execute_content_filters(struct client_state *csp)
{
+ char *content;
filter_function_ptr content_filter;
assert(content_filters_enabled(csp->action));
}
content_filter = get_filter_function(csp);
+ content = (content_filter != NULL) ? (*content_filter)(csp) : NULL;
+
+#ifdef FEATURE_EXTERNAL_FILTERS
+ if ((csp->content_type & CT_TEXT) &&
+ !list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]))
+ {
+ struct list_entry *filtername;
+ size_t size = (size_t)csp->content_length;
+
+ if (content == NULL)
+ {
+ content = csp->iob->cur;
+ size = (size_t)(csp->iob->eod - csp->iob->cur);
+ }
+
+ for (filtername = csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]->first;
+ filtername ; filtername = filtername->next)
+ {
+ char *result = execute_external_filter(csp, filtername->str, content, &size);
+ if (result != NULL)
+ {
+ if (content != csp->iob->cur)
+ {
+ free(content);
+ }
+ content = result;
+ }
+ }
+ csp->flags |= CSP_FLAG_MODIFIED;
+ csp->content_length = size;
+ }
+#endif /* def FEATURE_EXTERNAL_FILTERS */
+
+ return content;
- return ((*content_filter)(csp));
}
return;
}
+#ifdef FEATURE_CLIENT_TAGS
+ apply_url_actions(csp->action, http, csp->client_tags, b);
+#else
apply_url_actions(csp->action, http, b);
+#endif
}
return;
}
-
/*********************************************************************
*
* Function : apply_url_actions
* Parameters :
* 1 : action = Destination.
* 2 : http = Current URL
- * 3 : b = list of URL actions to apply
+ * 3 : client_tags = list of client tags
+ * 4 : b = list of URL actions to apply
*
* Returns : N/A
*
*********************************************************************/
-void apply_url_actions(struct current_action_spec *action,
- struct http_request *http,
- struct url_actions *b)
+static void apply_url_actions(struct current_action_spec *action,
+ struct http_request *http,
+#ifdef FEATURE_CLIENT_TAGS
+ const struct list *client_tags,
+#endif
+ struct url_actions *b)
{
if (b == NULL)
{
{
merge_current_action(action, b->action);
}
+#ifdef FEATURE_CLIENT_TAGS
+ if (client_tag_match(b->url, client_tags))
+ {
+ merge_current_action(action, b->action);
+ }
+#endif
}
}
* Invalid syntax is fatal.
*
*********************************************************************/
-const static struct forward_spec *get_forward_override_settings(struct client_state *csp)
+static const struct forward_spec *get_forward_override_settings(struct client_state *csp)
{
const char *forward_override_line = csp->action->string[ACTION_STRING_FORWARD_OVERRIDE];
char forward_settings[BUFFER_SIZE];
* the lifetime of this request. Save its location
* in csp as well, so sweep() can free it later on.
*/
- fwd = csp->fwd = zalloc(sizeof(*fwd));
- if (NULL == fwd)
- {
- log_error(LOG_LEVEL_FATAL,
- "can't allocate memory for forward-override{%s}", forward_override_line);
- /* Never get here - LOG_LEVEL_FATAL causes program exit */
- return NULL;
- }
+ fwd = csp->fwd = zalloc_or_die(sizeof(*fwd));
- vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1);
+ vec_count = ssplit(forward_settings, " \t", vec, SZ(vec));
if ((vec_count == 2) && !strcasecmp(vec[0], "forward"))
{
fwd->type = SOCKS_NONE;
/* Parse the parent HTTP proxy host:port */
http_parent = vec[1];
+ }
+ else if ((vec_count == 2) && !strcasecmp(vec[0], "forward-webserver"))
+ {
+ fwd->type = FORWARD_WEBSERVER;
+
+ /* Parse the parent HTTP server host:port */
+ http_parent = vec[1];
+
}
else if (vec_count == 3)
{
fwd->type = SOCKS_5;
socks_proxy = vec[1];
}
+ else if (!strcasecmp(vec[0], "forward-socks5t"))
+ {
+ fwd->type = SOCKS_5T;
+ socks_proxy = vec[1];
+ }
if (NULL != socks_proxy)
{
const struct forward_spec *forward_url(struct client_state *csp,
const struct http_request *http)
{
- static const struct forward_spec fwd_default[1] = { FORWARD_SPEC_INITIALIZER };
+ static const struct forward_spec fwd_default[1]; /* Zero'ed due to being static. */
struct forward_spec *fwd = csp->config->forward;
if (csp->action->flags & ACTION_FORWARD_OVERRIDE)
return cgi_error_memory();
}
- if (NULL == (rsp->status = strdup("501 Not Implemented")))
- {
- free_http_response(rsp);
- return cgi_error_memory();
- }
-
+ rsp->status = strdup_or_die("501 Not Implemented");
rsp->is_static = 1;
rsp->crunch_reason = UNSUPPORTED;
* the content type and action settings.
*/
if ((csp->content_type & CT_TEXT) &&
- (csp->rlist != NULL) &&
- (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
+ (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]) ||
+ !list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER])))
{
return TRUE;
}
int content_filters_enabled(const struct current_action_spec *action)
{
return ((action->flags & ACTION_DEANIMATE) ||
- !list_is_empty(action->multi[ACTION_MULTI_FILTER]));
+ !list_is_empty(action->multi[ACTION_MULTI_FILTER]) ||
+ !list_is_empty(action->multi[ACTION_MULTI_EXTERNAL_FILTER]));
}