-const char filters_rcs[] = "$Id: filters.c,v 1.152 2011/10/30 16:18:12 fabiankeil Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.171 2012/03/18 13:47:33 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/filters.c,v $
*
* Purpose : Declares functions to parse/crunch headers and pages.
- * Functions declared include:
- * `acl_addr', `add_stats', `block_acl', `block_imageurl',
- * `block_url', `url_actions', `domain_split',
- * `filter_popups', `forward_url', 'redirect_url',
- * `ij_untrusted_url', `intercept_url', `pcrs_filter_respose',
- * `ijb_send_banner', `trust_url', `gif_deanimate_response',
- * `execute_single_pcrs_command', `rewrite_url',
- * `get_last_url'
- *
- * Copyright : Written by and Copyright (C) 2001-2010 the
+ *
+ * Copyright : Written by and Copyright (C) 2001-2011 the
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
#include "urlmatch.h"
#include "loaders.h"
-#ifdef HAVE_STRTOK
-/* Only used for locks */
-#include "jcc.h"
-#endif /* def HAVE_STRTOK */
-
#ifdef _WIN32
#include "win32.h"
#endif
const char filters_h_rcs[] = FILTERS_H_VERSION;
-/* Fix a problem with Solaris. There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
- * as an array index. Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- */
-#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
-
typedef char *(*filter_function_ptr)();
static filter_function_ptr get_filter_function(const struct client_state *csp);
static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size);
if (network->ss_family != netmask->ss_family)
{
/* This should never happen */
- log_error(LOG_LEVEL_ERROR,
- "Internal error at %s:%llu: network and netmask differ in family",
- __FILE__, __LINE__);
- return 0;
+ assert(network->ss_family == netmask->ss_family);
+ log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family.");
}
sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port);
if ((p = strchr(acl_spec, '/')) != NULL)
{
*p++ = '\0';
- if (ijb_isdigit(*p) == 0)
+ if (privoxy_isdigit(*p) == 0)
{
freez(acl_spec);
return(-1);
/* determine HOW images should be blocked */
p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER];
- if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
{
log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image.");
}
}
else
#endif /* def FEATURE_IMAGE_BLOCKING */
- if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
{
/*
* Send empty document.
return NULL;
}
- if (0 == strcmpic(redirect_mode, "check-decoded-url"))
+ if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%'))
{
log_error(LOG_LEVEL_REDIRECTS,
"Checking \"%s\" for encoded redirects.", subject);
-#if defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK)
/*
* Check each parameter in the URL separately.
* Sectionize the URL at "?" and "&",
- * then URL-decode each component,
+ * go backwards through the segments, URL-decode them
* and look for a URL in the decoded result.
- * Keep the last one we spot.
+ * Stop the search after the first match.
*/
- char *found = NULL;
+ char *url_segment = NULL;
+ /*
+ * XXX: This estimate is guaranteed to be high enough as we
+ * let ssplit() ignore empty fields, but also a bit wasteful.
+ */
+ size_t max_segments = strlen(subject) / 2;
+ char **url_segments = malloc(max_segments * sizeof(char *));
+ int segments;
+
+ if (NULL == url_segments)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Out of memory while decoding URL: %s", subject);
+ freez(subject);
+ return NULL;
+ }
- privoxy_mutex_lock(&strtok_mutex);
- char *token = strtok(subject, "?&");
- while (token)
+ segments = ssplit(subject, "?&", url_segments, max_segments);
+
+ while (segments-- > 0)
{
- char *dtoken = url_decode(token);
+ char *dtoken = url_decode(url_segments[segments]);
if (NULL == dtoken)
{
- log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", token);
+ log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]);
continue;
}
- char *h1 = strstr(dtoken, "http://");
- char *h2 = strstr(dtoken, "https://");
- char *h = (h1 && h2
- ? (h1 < h2 ? h1 : h2)
- : (h1 ? h1 : h2));
- if (h)
+ url_segment = strstr(dtoken, "http://");
+ if (NULL == url_segment)
{
- freez(found);
- found = strdup(h);
- if (found == NULL)
+ url_segment = strstr(dtoken, "https://");
+ }
+ if (NULL != url_segment)
+ {
+ url_segment = strdup(url_segment);
+ freez(dtoken);
+ if (url_segment == NULL)
{
log_error(LOG_LEVEL_ERROR,
"Out of memory while searching for redirects.");
- privoxy_mutex_unlock(&strtok_mutex);
return NULL;
}
+ break;
}
freez(dtoken);
- token = strtok(NULL, "?&");
}
- privoxy_mutex_unlock(&strtok_mutex);
freez(subject);
+ freez(url_segments);
- return found;
-#else
- new_url = url_decode(subject);
- if (new_url != NULL)
+ if (url_segment == NULL)
{
- freez(subject);
- subject = new_url;
+ return NULL;
}
- else
- {
- log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject);
- }
-#endif /* defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK) */
+ subject = url_segment;
+ }
+ else
+ {
+ /* Look for a URL inside this one, without decoding anything. */
+ log_error(LOG_LEVEL_REDIRECTS,
+ "Checking \"%s\" for unencoded redirects.", subject);
}
-
- /* Else, just look for a URL inside this one, without decoding anything. */
-
- log_error(LOG_LEVEL_REDIRECTS,
- "Checking \"%s\" for unencoded redirects.", subject);
/*
* Find the last URL encoded in the request
/* Did any redirect action trigger? */
if (new_url)
{
+ if (url_requires_percent_encoding(new_url))
+ {
+ char *encoded_url;
+ log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N",
+ strlen(new_url), new_url);
+ encoded_url = percent_encode_url(new_url);
+ freez(new_url);
+ if (encoded_url == NULL)
+ {
+ return cgi_error_memory();
+ }
+ new_url = encoded_url;
+ assert(FALSE == url_requires_percent_encoding(new_url));
+ }
+
if (0 == strcmpic(new_url, csp->http->url))
{
log_error(LOG_LEVEL_ERROR,
return cgi_error_memory();
}
- if ( enlist_unique_header(rsp->headers, "Location", new_url)
- || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+ if (enlist_unique_header(rsp->headers, "Location", new_url)
+ || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))))
{
freez(new_url);
free_http_response(rsp);
return JB_ERR_PARSE;
}
- if ((newsize += chunksize) >= *size)
+ if (chunksize >= *size - newsize)
{
- /*
- * XXX: The message is a bit confusing. Isn't the real problem that
- * the specified chunk size is greater than the number of bytes
- * left in the buffer? This probably means the connection got
- * closed prematurely. To be investigated after 3.0.17 is out.
- */
log_error(LOG_LEVEL_ERROR,
- "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding",
- chunksize, *size);
+ "Chunk size %u exceeds buffered data left. "
+ "Already digested %u of %u buffered bytes.",
+ chunksize, (unsigned int)newsize, (unsigned int)*size);
return JB_ERR_PARSE;
}
+ newsize += chunksize;
from_p += 2;
memmove(to_p, from_p, (size_t) chunksize);
return NULL;
}
- vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1);
+ vec_count = ssplit(forward_settings, " \t", vec, SZ(vec));
if ((vec_count == 2) && !strcasecmp(vec[0], "forward"))
{
fwd->type = SOCKS_NONE;