-const char filters_rcs[] = "$Id: filters.c,v 1.141 2011/03/08 18:32:11 fabiankeil Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.179 2013/12/24 13:32:51 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/filters.c,v $
*
* Purpose : Declares functions to parse/crunch headers and pages.
- * Functions declared include:
- * `acl_addr', `add_stats', `block_acl', `block_imageurl',
- * `block_url', `url_actions', `domain_split',
- * `filter_popups', `forward_url', 'redirect_url',
- * `ij_untrusted_url', `intercept_url', `pcrs_filter_respose',
- * `ijb_send_banner', `trust_url', `gif_deanimate_response',
- * `execute_single_pcrs_command', `rewrite_url',
- * `get_last_url'
- *
- * Copyright : Written by and Copyright (C) 2001-2010 the
+ *
+ * Copyright : Written by and Copyright (C) 2001-2011 the
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
const char filters_h_rcs[] = FILTERS_H_VERSION;
-/* Fix a problem with Solaris. There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
- * as an array index. Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- */
-#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
-
typedef char *(*filter_function_ptr)();
static filter_function_ptr get_filter_function(const struct client_state *csp);
static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size);
if (network->ss_family != netmask->ss_family)
{
/* This should never happen */
- log_error(LOG_LEVEL_ERROR,
- "Internal error at %s:%llu: network and netmask differ in family",
- __FILE__, __LINE__);
- return 0;
+ assert(network->ss_family == netmask->ss_family);
+ log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family.");
}
sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port);
netmask_addr += 12;
addr_len = 4;
}
- else if (network->ss_family != address->ss_family)
- {
- return 0;
- }
/* XXX: Port check is signaled in netmask */
if (*netmask_port && *network_port != *address_port)
if ((p = strchr(acl_spec, '/')) != NULL)
{
*p++ = '\0';
- if (ijb_isdigit(*p) == 0)
+ if (privoxy_isdigit(*p) == 0)
{
freez(acl_spec);
return(-1);
}
if (csp->action->flags & ACTION_REDIRECT)
{
- log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block.");
+ log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block.");
}
/*
* Else, prepare a response
/* determine HOW images should be blocked */
p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER];
- if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
{
log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image.");
}
}
else
#endif /* def FEATURE_IMAGE_BLOCKING */
- if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
{
/*
- * Send empty document.
+ * Send empty document.
*/
new_content_type = csp->action->string[ACTION_STRING_CONTENT_TYPE];
{
jb_err err;
struct map * exports;
- char *p;
-
- /*
- * Workaround for stupid Netscape bug which prevents
- * pages from being displayed if loading a referenced
- * JavaScript or style sheet fails. So make it appear
- * as if it succeeded.
- */
- if ( NULL != (p = get_header_value(csp->headers, "User-Agent:"))
- && !strncmpic(p, "mozilla", 7) /* Catch Netscape but */
- && !strstr(p, "Gecko") /* save Mozilla, */
- && !strstr(p, "compatible") /* MSIE */
- && !strstr(p, "Opera")) /* and Opera. */
- {
- rsp->status = strdup("200 Request blocked by Privoxy");
- }
- else
- {
- rsp->status = strdup("403 Request blocked by Privoxy");
- }
+ rsp->status = strdup("403 Request blocked by Privoxy");
if (rsp->status == NULL)
{
free_http_response(rsp);
struct map * exports;
char buf[BUFFER_SIZE];
char *p;
- struct url_spec **tl;
- struct url_spec *t;
+ struct pattern_spec **tl;
+ struct pattern_spec *t;
jb_err err;
/*
* Export the protocol, host, port, and referrer information
*/
err = map(exports, "hostport", 1, csp->http->hostport, 1);
- if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1);
+ if (!err) err = map(exports, "protocol", 1, csp->http->ssl ? "https://" : "http://", 1);
if (!err) err = map(exports, "path", 1, csp->http->path, 1);
if (NULL != (p = get_header_value(csp->headers, "Referer:")))
* 2 : b = The filter list to compile
*
* Returns : NULL in case of errors, otherwise the
- * pcrs job list.
+ * pcrs job list.
*
*********************************************************************/
pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, const struct re_filterfile_spec *b)
* 2 : pcrs_command = pcrs command formatted as string (s@foo@bar@)
*
*
- * Returns : NULL if the pcrs_command didn't change the url, or
+ * Returns : NULL if the pcrs_command didn't change the url, or
* the result of the modification.
*
*********************************************************************/
*
* Parameters :
* 1 : subject = the string to check
- * 2 : redirect_mode = +fast-redirect{} mode
+ * 2 : redirect_mode = +fast-redirect{} mode
*
* Returns : NULL if no URL was found, or
* the last URL found.
return NULL;
}
- if (0 == strcmpic(redirect_mode, "check-decoded-url"))
+ if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%'))
{
- log_error(LOG_LEVEL_REDIRECTS, "Decoding \"%s\" if necessary.", subject);
- new_url = url_decode(subject);
- if (new_url != NULL)
+ char *url_segment = NULL;
+ char **url_segments;
+ size_t max_segments;
+ int segments;
+
+ log_error(LOG_LEVEL_REDIRECTS,
+ "Checking \"%s\" for encoded redirects.", subject);
+
+ /*
+ * Check each parameter in the URL separately.
+ * Sectionize the URL at "?" and "&",
+ * go backwards through the segments, URL-decode them
+ * and look for a URL in the decoded result.
+ * Stop the search after the first match.
+ *
+ * XXX: This estimate is guaranteed to be high enough as we
+ * let ssplit() ignore empty fields, but also a bit wasteful.
+ */
+ max_segments = strlen(subject) / 2;
+ url_segments = malloc(max_segments * sizeof(char *));
+
+ if (NULL == url_segments)
{
+ log_error(LOG_LEVEL_ERROR,
+ "Out of memory while decoding URL: %s", subject);
freez(subject);
- subject = new_url;
+ return NULL;
}
- else
+
+ segments = ssplit(subject, "?&", url_segments, max_segments);
+
+ while (segments-- > 0)
{
- log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject);
+ char *dtoken = url_decode(url_segments[segments]);
+ if (NULL == dtoken)
+ {
+ log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]);
+ continue;
+ }
+ url_segment = strstr(dtoken, "http://");
+ if (NULL == url_segment)
+ {
+ url_segment = strstr(dtoken, "https://");
+ }
+ if (NULL != url_segment)
+ {
+ url_segment = strdup(url_segment);
+ freez(dtoken);
+ if (url_segment == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Out of memory while searching for redirects.");
+ return NULL;
+ }
+ break;
+ }
+ freez(dtoken);
}
- }
+ freez(subject);
+ freez(url_segments);
- log_error(LOG_LEVEL_REDIRECTS, "Checking \"%s\" for redirects.", subject);
+ if (url_segment == NULL)
+ {
+ return NULL;
+ }
+ subject = url_segment;
+ }
+ else
+ {
+ /* Look for a URL inside this one, without decoding anything. */
+ log_error(LOG_LEVEL_REDIRECTS,
+ "Checking \"%s\" for unencoded redirects.", subject);
+ }
/*
* Find the last URL encoded in the request
))
{
/*
- * Return new URL if we found a redirect
+ * Return new URL if we found a redirect
* or if the subject already was a URL.
*
* The second case makes sure that we can
#endif /* def FEATURE_FAST_REDIRECTS */
csp->action->flags &= ~ACTION_REDIRECT;
- /* Did any redirect action trigger? */
+ /* Did any redirect action trigger? */
if (new_url)
{
+ if (url_requires_percent_encoding(new_url))
+ {
+ char *encoded_url;
+ log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N",
+ strlen(new_url), new_url);
+ encoded_url = percent_encode_url(new_url);
+ freez(new_url);
+ if (encoded_url == NULL)
+ {
+ return cgi_error_memory();
+ }
+ new_url = encoded_url;
+ assert(FALSE == url_requires_percent_encoding(new_url));
+ }
+
if (0 == strcmpic(new_url, csp->http->url))
{
log_error(LOG_LEVEL_ERROR,
return cgi_error_memory();
}
- if ( enlist_unique_header(rsp->headers, "Location", new_url)
- || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+ if (enlist_unique_header(rsp->headers, "Location", new_url)
+ || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))))
{
freez(new_url);
free_http_response(rsp);
{
struct file_list *fl;
struct block_spec *b;
- struct url_spec **trusted_url;
+ struct pattern_spec **trusted_url;
struct http_request rhttp[1];
const char * referer;
jb_err err;
#endif /* def FEATURE_TRUST */
+/*********************************************************************
+ *
+ * Function : get_filter
+ *
+ * Description : Get a filter with a given name and type.
+ * Note that taggers are filters, too.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : requested_name = Name of the content filter to get
+ * 3 : requested_type = Type of the filter to tagger to lookup
+ *
+ * Returns : A pointer to the requested filter
+ * or NULL if the filter wasn't found
+ *
+ *********************************************************************/
+struct re_filterfile_spec *get_filter(const struct client_state *csp,
+ const char *requested_name,
+ enum filter_type requested_type)
+{
+ int i;
+ struct re_filterfile_spec *b;
+ struct file_list *fl;
+
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if ((NULL == fl) || (NULL == fl->f))
+ {
+ /*
+ * Either there are no filter files left or this
+ * filter file just contains no valid filters.
+ *
+ * Continue to be sure we don't miss valid filter
+ * files that are chained after empty or invalid ones.
+ */
+ continue;
+ }
+
+ for (b = fl->f; b != NULL; b = b->next)
+ {
+ if (b->type != requested_type)
+ {
+ /* The callers isn't interested in this filter type. */
+ continue;
+ }
+ if (strcmp(b->name, requested_name) == 0)
+ {
+ /* The requested filter has been found. Abort search. */
+ return b;
+ }
+ }
+ }
+
+ /* No filter with the given name and type exists. */
+ return NULL;
+
+}
+
+
/*********************************************************************
*
* Function : pcrs_filter_response
static char *pcrs_filter_response(struct client_state *csp)
{
int hits = 0;
- int i;
size_t size, prev_size;
char *old = NULL;
char *new = NULL;
pcrs_job *job;
- struct file_list *fl;
struct re_filterfile_spec *b;
struct list_entry *filtername;
- /*
+ /*
* Sanity first
*/
if (csp->iob->cur >= csp->iob->eod)
size = (size_t)(csp->iob->eod - csp->iob->cur);
old = csp->iob->cur;
- for (i = 0; i < MAX_AF_FILES; i++)
- {
- fl = csp->rlist[i];
- if ((NULL == fl) || (NULL == fl->f))
- {
- /*
- * Either there are no filter files
- * left, or this filter file just
- * contains no valid filters.
- *
- * Continue to be sure we don't miss
- * valid filter files that are chained
- * after empty or invalid ones.
- */
- continue;
- }
/*
* For all applying +filter actions, look if a filter by that
* name exists and if yes, execute it's pcrs_joblist on the
* buffer.
*/
- for (b = fl->f; b; b = b->next)
+ for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
+ filtername != NULL; filtername = filtername->next)
{
- if (b->type != FT_CONTENT_FILTER)
+ int current_hits = 0; /* Number of hits caused by this filter */
+ int job_number = 0; /* Which job we're currently executing */
+ int job_hits = 0; /* How many hits the current job caused */
+ pcrs_job *joblist;
+
+ b = get_filter(csp, filtername->str, FT_CONTENT_FILTER);
+ if (b == NULL)
{
- /* Skip header filters */
continue;
}
- for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
- filtername ; filtername = filtername->next)
- {
- if (strcmp(b->name, filtername->str) == 0)
- {
- int current_hits = 0; /* Number of hits caused by this filter */
- int job_number = 0; /* Which job we're currently executing */
- int job_hits = 0; /* How many hits the current job caused */
- pcrs_job *joblist = b->joblist;
+ joblist = b->joblist;
- if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
- if (NULL == joblist)
- {
- log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
- continue;
- }
+ if (NULL == joblist)
+ {
+ log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
+ continue;
+ }
- prev_size = size;
- /* Apply all jobs from the joblist */
- for (job = joblist; NULL != job; job = job->next)
- {
- job_number++;
- job_hits = pcrs_execute(job, old, size, &new, &size);
+ prev_size = size;
+ /* Apply all jobs from the joblist */
+ for (job = joblist; NULL != job; job = job->next)
+ {
+ job_number++;
+ job_hits = pcrs_execute(job, old, size, &new, &size);
- if (job_hits >= 0)
- {
- /*
- * That went well. Continue filtering
- * and use the result of this job as
- * input for the next one.
- */
- current_hits += job_hits;
- if (old != csp->iob->cur)
- {
- freez(old);
- }
- old = new;
- }
- else
- {
- /*
- * This job caused an unexpected error. Inform the user
- * and skip the rest of the jobs in this filter. We could
- * continue with the next job, but usually the jobs
- * depend on each other or are similar enough to
- * fail for the same reason.
- *
- * At the moment our pcrs expects the error codes of pcre 3.4,
- * but newer pcre versions can return additional error codes.
- * As a result pcrs_strerror()'s error message might be
- * "Unknown error ...", therefore we print the numerical value
- * as well.
- *
- * XXX: Is this important enough for LOG_LEVEL_ERROR or
- * should we use LOG_LEVEL_RE_FILTER instead?
- */
- log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
- b->name, job_number, pcrs_strerror(job_hits), job_hits);
- break;
- }
+ if (job_hits >= 0)
+ {
+ /*
+ * That went well. Continue filtering
+ * and use the result of this job as
+ * input for the next one.
+ */
+ current_hits += job_hits;
+ if (old != csp->iob->cur)
+ {
+ freez(old);
}
+ old = new;
+ }
+ else
+ {
+ /*
+ * This job caused an unexpected error. Inform the user
+ * and skip the rest of the jobs in this filter. We could
+ * continue with the next job, but usually the jobs
+ * depend on each other or are similar enough to
+ * fail for the same reason.
+ *
+ * At the moment our pcrs expects the error codes of pcre 3.4,
+ * but newer pcre versions can return additional error codes.
+ * As a result pcrs_strerror()'s error message might be
+ * "Unknown error ...", therefore we print the numerical value
+ * as well.
+ *
+ * XXX: Is this important enough for LOG_LEVEL_ERROR or
+ * should we use LOG_LEVEL_RE_FILTER instead?
+ */
+ log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
+ b->name, job_number, pcrs_strerror(job_hits), job_hits);
+ break;
+ }
+ }
- if (b->dynamic) pcrs_free_joblist(joblist);
+ if (b->dynamic) pcrs_free_joblist(joblist);
- log_error(LOG_LEVEL_RE_FILTER,
- "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
- csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
+ log_error(LOG_LEVEL_RE_FILTER,
+ "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
+ csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
- hits += current_hits;
- }
- }
- }
+ hits += current_hits;
}
/*
csp->flags |= CSP_FLAG_MODIFIED;
csp->content_length = size;
- IOB_RESET(csp);
+ clear_iob(csp->iob);
return(new);
return JB_ERR_PARSE;
}
- if ((newsize += chunksize) >= *size)
+ if (chunksize >= *size - newsize)
{
- /*
- * XXX: The message is a bit confusing. Isn't the real problem that
- * the specified chunk size is greater than the number of bytes
- * left in the buffer? This probably means the connection got
- * closed prematurely. To be investigated after 3.0.17 is out.
- */
log_error(LOG_LEVEL_ERROR,
- "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding",
- chunksize, *size);
+ "Chunk size %u exceeds buffered data left. "
+ "Already digested %u of %u buffered bytes.",
+ chunksize, (unsigned int)newsize, (unsigned int)*size);
return JB_ERR_PARSE;
}
+ newsize += chunksize;
from_p += 2;
memmove(to_p, from_p, (size_t) chunksize);
break;
}
}
-
+
/* XXX: Should get its own loglevel. */
log_error(LOG_LEVEL_RE_FILTER, "De-chunking successful. Shrunk from %d to %d", *size, newsize);
return NULL;
}
- vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1);
+ vec_count = ssplit(forward_settings, " \t", vec, SZ(vec));
if ((vec_count == 2) && !strcasecmp(vec[0], "forward"))
{
fwd->type = SOCKS_NONE;
fwd->type = SOCKS_5;
socks_proxy = vec[1];
}
+ else if (!strcasecmp(vec[0], "forward-socks5t"))
+ {
+ fwd->type = SOCKS_5T;
+ socks_proxy = vec[1];
+ }
if (NULL != socks_proxy)
{
const struct forward_spec *forward_url(struct client_state *csp,
const struct http_request *http)
{
- static const struct forward_spec fwd_default[1] = { FORWARD_SPEC_INITIALIZER };
+ static const struct forward_spec fwd_default[1]; /* Zero'ed due to being static. */
struct forward_spec *fwd = csp->config->forward;
if (csp->action->flags & ACTION_FORWARD_OVERRIDE)
/*********************************************************************
*
- * Function : direct_response
+ * Function : direct_response
*
* Description : Check if Max-Forwards == 0 for an OPTIONS or TRACE
* request and if so, return a HTTP 501 to the client.
* requests properly. Still, what we do here is rfc-
* compliant, whereas ignoring or forwarding are not.
*
- * Parameters :
+ * Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
*
* Returns : http_response if , NULL if nonmatch or handler fail
{
for (p = csp->headers->first; (p != NULL) ; p = p->next)
{
- if (!strncmpic("Max-Forwards:", p->str, 13))
+ if (!strncmpic(p->str, "Max-Forwards:", 13))
{
unsigned int max_forwards;
{
return cgi_error_memory();
}
-
+
if (NULL == (rsp->status = strdup("501 Not Implemented")))
{
free_http_response(rsp);
* Description : Checks whether there are any content filters
* enabled for the current request.
*
- * Parameters :
+ * Parameters :
* 1 : action = Action spec to check.
*
* Returns : TRUE for yes, FALSE otherwise