-const char filters_rcs[] = "$Id: filters.c,v 1.58.2.9 2006/01/29 23:10:56 david__schmidt Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.70 2006/12/09 13:33:15 fabiankeil Exp $";
/*********************************************************************
*
- * File : $Source: /cvsroot/ijbswa/current/Attic/filters.c,v $
+ * File : $Source: /cvsroot/ijbswa/current/filters.c,v $
*
* Purpose : Declares functions to parse/crunch headers and pages.
* Functions declared include:
* `filter_popups', `forward_url', 'redirect_url',
* `ij_untrusted_url', `intercept_url', `pcrs_filter_respose',
* `ijb_send_banner', `trust_url', `gif_deanimate_response',
- * `jpeg_inspect_response'
+ * `jpeg_inspect_response', `execute_single_pcrs_command',
+ * `rewrite_url', `get_last_url'
*
- * Copyright : Written by and Copyright (C) 2001, 2004 the SourceForge
+ * Copyright : Written by and Copyright (C) 2001, 2004-2006 the SourceForge
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
*
* Revisions :
* $Log: filters.c,v $
+ * Revision 1.70 2006/12/09 13:33:15 fabiankeil
+ * Added some sanity checks for get_last_url().
+ * Fixed possible segfault caused by my last commit.
+ *
+ * Revision 1.69 2006/12/08 12:39:13 fabiankeil
+ * Let get_last_url() catch https URLs as well.
+ *
+ * Revision 1.68 2006/12/05 14:45:48 fabiankeil
+ * Make sure get_last_url() behaves like advertised
+ * and fast-redirects{} can be combined with redirect{}.
+ *
+ * Revision 1.67 2006/11/28 15:19:43 fabiankeil
+ * Implemented +redirect{s@foo@bar@} to generate
+ * a redirect based on a rewritten version of the
+ * original URL.
+ *
+ * Revision 1.66 2006/09/23 13:26:38 roro
+ * Replace TABs by spaces in source code.
+ *
+ * Revision 1.65 2006/09/21 12:54:43 fabiankeil
+ * Fix +redirect{}. Didn't work with -fast-redirects.
+ *
+ * Revision 1.64 2006/08/31 10:55:49 fabiankeil
+ * Block requests for untrusted URLs with status
+ * code 403 instead of 200.
+ *
+ * Revision 1.63 2006/08/31 10:11:28 fabiankeil
+ * Don't free p which is still in use and will be later
+ * freed by free_map(). Don't claim the referrer is unknown
+ * when the client didn't set one.
+ *
+ * Revision 1.62 2006/08/14 00:27:47 david__schmidt
+ * Feature request 595948: Re-Filter logging in single line
+ *
+ * Revision 1.61 2006/08/03 02:46:41 david__schmidt
+ * Incorporate Fabian Keil's patch work:\rhttp://www.fabiankeil.de/sourcecode/privoxy/
+ *
+ * Revision 1.60 2006/07/18 14:48:46 david__schmidt
+ * Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch)
+ * with what was really the latest development (the v_3_0_branch branch)
+ *
* Revision 1.58.2.9 2006/01/29 23:10:56 david__schmidt
* Multiple filter file support
*
struct http_response *block_url(struct client_state *csp)
{
struct http_response *rsp;
+ const char *new_content_type = NULL;
/*
* If it's not blocked, don't block it ;-)
{
return NULL;
}
-
+ if (csp->action->flags & ACTION_REDIRECT)
+ {
+ log_error(LOG_LEVEL_ERROR, "redirect{} overruled by block.");
+ }
/*
* Else, prepare a response
*/
if (((csp->action->flags & ACTION_IMAGE_BLOCKER) != 0)
&& is_imageurl(csp))
{
- char *p;
+ char *p;
/* determine HOW images should be blocked */
p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER];
+ if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ {
+ log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image.");
+ }
#if 1 /* Two alternative strategies, use this one for now: */
/* and handle accordingly: */
if ((p == NULL) || (0 == strcmpic(p, "pattern")))
{
+ rsp->status = strdup("403 Request blocked by Privoxy");
+ if (rsp->status == NULL)
+ {
+ free_http_response(rsp);
+ return cgi_error_memory();
+ }
rsp->body = bindup(image_pattern_data, image_pattern_length);
if (rsp->body == NULL)
{
else if (0 == strcmpic(p, "blank"))
{
+ rsp->status = strdup("403 Request blocked by Privoxy");
+ if (rsp->status == NULL)
+ {
+ free_http_response(rsp);
+ return cgi_error_memory();
+ }
rsp->body = bindup(image_blank_data, image_blank_length);
if (rsp->body == NULL)
{
return cgi_error_memory();
}
#endif /* Preceeding code is disabled for now */
+ }
+ else if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+ {
+ /*
+ * Send empty document.
+ */
+ new_content_type = csp->action->string[ACTION_STRING_CONTENT_TYPE];
+
+ freez(rsp->body);
+ rsp->body = strdup(" ");
+ rsp->content_length = 1;
+
+ rsp->status = strdup("403 Request blocked by Privoxy");
+ if (rsp->status == NULL)
+ {
+ free_http_response(rsp);
+ return cgi_error_memory();
+ }
+ if (new_content_type != 0)
+ {
+ log_error(LOG_LEVEL_HEADER, "Overwriting Content-Type with %s", new_content_type);
+ if (enlist_unique_header(rsp->headers, "Content-Type", new_content_type))
+ {
+ free_http_response(rsp);
+ return cgi_error_memory();
+ }
+ }
+
}
else
#endif /* def FEATURE_IMAGE_BLOCKING */
* Function : trust_url FIXME: I should be called distrust_url
*
* Description : Calls is_untrusted_url to determine if the URL is trusted
- * and if not, returns a HTTP 304 response with a reject message.
+ * and if not, returns a HTTP 403 response with a reject message.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
return cgi_error_memory();
}
+ rsp->status = strdup("403 Request blocked by Privoxy");
exports = default_exports(csp, NULL);
- if (exports == NULL)
+ if (exports == NULL || rsp->status == NULL)
{
free_http_response(rsp);
return cgi_error_memory();
}
else
{
- if (!err) err = map(exports, "referrer", 1, "unknown", 1);
+ if (!err) err = map(exports, "referrer", 1, "none set", 1);
}
if (err)
string_append(&p, buf);
}
err = map(exports, "trusted-referrers", 1, p, 0);
- freez(p);
if (err)
{
string_append(&p, buf);
}
err = map(exports, "trust-info", 1, p, 0);
- freez(p);
}
else
{
}
#endif /* def FEATURE_TRUST */
+/*********************************************************************
+ *
+ * Function : execute_single_pcrs_command
+ *
+ * Description : Apply single pcrs command to the subject.
+ * The subject itself is left untouched, memory for the result
+ * is malloc()ed and it is the caller's responsibility to free
+ * the result when it's no longer needed.
+ *
+ * Parameters :
+ * 1 : subject = the subject (== original) string
+ * 2 : pcrs_command = the pcrs command as string (s@foo@bar@)
+ * 3 : hits = int* for returning the number of modifications
+ *
+ * Returns : NULL in case of errors, otherwise the
+ * result of the pcrs command.
+ *
+ *********************************************************************/
+char *execute_single_pcrs_command(char *subject, const char *pcrs_command, int *hits)
+{
+ int error;
+ size_t size;
+ char *result = NULL;
+ pcrs_job *job;
+
+ assert(subject);
+ assert(pcrs_command);
+
+ *hits = 0;
+ size = strlen(subject);
+
+ if (NULL == (job = pcrs_compile_command(pcrs_command, &error)))
+ {
+ log_error(LOG_LEVEL_ERROR, "Failed to compile pcrs command \"%s\". Error: %d.",
+ pcrs_command, error);
+ }
+ else if ((*hits = pcrs_execute(job, subject, size, &result, &size)) < 0)
+ {
+ log_error(LOG_LEVEL_ERROR, "Failed to execute pcrs command: %s", pcrs_strerror(*hits));
+ *hits = 0;
+ freez(result);
+ }
+
+ if (job)
+ {
+ job = pcrs_free_job(job);
+ }
+
+ return result;
+
+}
+
+/*********************************************************************
+ *
+ * Function : rewrite_url
+ *
+ * Description : Rewrites a URL with a single pcrs command
+ * and returns the result if it differs from the
+ * original and isn't obviously invalid.
+ *
+ * Parameters :
+ * 1 : old_url = URL to rewrite.
+ * 2 : pcrs_command = pcrs command formatted as string (s@foo@bar@)
+ *
+ *
+ * Returns : NULL if the pcrs_command didn't change the url, or
+ * the result of the modification.
+ *
+ *********************************************************************/
+char *rewrite_url(char *old_url, const char *pcrs_command)
+{
+ char *new_url = NULL;
+ int hits;
+
+ assert(old_url);
+ assert(pcrs_command);
+
+ new_url = execute_single_pcrs_command(old_url, pcrs_command, &hits);
+
+ if (hits == 0)
+ {
+ log_error(LOG_LEVEL_REDIRECTS,
+ "pcrs command \"%s\" didn't change \"%s\".",
+ pcrs_command, old_url, new_url);
+ freez(new_url);
+ }
+ else if (strncmpic(new_url, "http://", 7) && strncmpic(new_url, "https://", 8))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "pcrs command \"%s\" changed \"%s\" to \"%s\" (%u hi%s), "
+ "but the result doesn't look like a valid URL and will be ignored.",
+ pcrs_command, old_url, new_url, hits, (hits == 1) ? "t" : "ts");
+ freez(new_url);
+ }
+ else
+ {
+ log_error(LOG_LEVEL_REDIRECTS,
+ "pcrs command \"%s\" changed \"%s\" to \"%s\" (%u hi%s).",
+ pcrs_command, old_url, new_url, hits, (hits == 1) ? "t" : "ts");
+ }
+
+ return new_url;
+
+}
+
#ifdef FEATURE_FAST_REDIRECTS
/*********************************************************************
*
- * Function : redirect_url
+ * Function : get_last_url
*
- * Description : Checks for redirection URLs and returns a HTTP redirect
- * to the destination URL, if necessary
+ * Description : Search for the last URL inside a string.
+ * If the string already is a URL, it will
+ * be the first URL found.
*
* Parameters :
- * 1 : csp = Current client state (buffers, headers, etc...)
+ * 1 : subject = the string to check
+ * 2 : redirect_mode = +fast-redirect{} mode
*
- * Returns : NULL if URL was clean, HTTP redirect otherwise.
+ * Returns : NULL if no URL was found, or
+ * the last URL found.
*
*********************************************************************/
-struct http_response *redirect_url(struct client_state *csp)
+char *get_last_url(char *subject, const char *redirect_mode)
{
- char *p, *q;
- struct http_response *rsp;
+ char *new_url = NULL;
+ char *tmp;
+
+ assert(subject);
+ assert(redirect_mode);
+
+ subject = strdup(subject);
+ if (subject == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "Out of memory while searching for redirects.");
+ return NULL;
+ }
- p = q = csp->http->path;
- log_error(LOG_LEVEL_REDIRECTS, "checking path for redirects: %s", p);
+ if (0 == strcmpic(redirect_mode, "check-decoded-url"))
+ {
+ log_error(LOG_LEVEL_REDIRECTS, "Decoding \"%s\" if necessary.", subject);
+ new_url = url_decode(subject);
+ if (new_url != NULL)
+ {
+ freez(subject);
+ subject = new_url;
+ }
+ else
+ {
+ log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject);
+ }
+ }
+
+ log_error(LOG_LEVEL_REDIRECTS, "Checking \"%s\" for redirects.", subject);
/*
- * find the last URL encoded in the request
+ * Find the last URL encoded in the request
*/
- while ((p = strstr(p, "http://")) != NULL)
+ tmp = subject;
+ while ((tmp = strstr(tmp, "http://")) != NULL)
+ {
+ new_url = tmp++;
+ }
+ tmp = (new_url != NULL) ? new_url : subject;
+ while ((tmp = strstr(tmp, "https://")) != NULL)
+ {
+ new_url = tmp++;
+ }
+
+ if ((new_url != NULL)
+ && ( (new_url != subject)
+ || (0 == strncmpic(subject, "http://", 7))
+ || (0 == strncmpic(subject, "https://", 8))
+ ))
{
- q = p++;
+ /*
+ * Return new URL if we found a redirect
+ * or if the subject already was a URL.
+ *
+ * The second case makes sure that we can
+ * chain get_last_url after another redirection check
+ * (like rewrite_url) without losing earlier redirects.
+ */
+ new_url = strdup(new_url);
+ freez(subject);
+ return new_url;
}
+ freez(subject);
+ return NULL;
+
+}
+#endif /* def FEATURE_FAST_REDIRECTS */
+
+
+/*********************************************************************
+ *
+ * Function : redirect_url
+ *
+ * Description : Checks if Privoxy should answer the request with
+ * a HTTP redirect and generates the redirect if
+ * necessary.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : NULL if the request can pass, HTTP redirect otherwise.
+ *
+ *********************************************************************/
+struct http_response *redirect_url(struct client_state *csp)
+{
+ struct http_response *rsp;
+#ifdef FEATURE_FAST_REDIRECTS
/*
- * if there was any, generate and return a HTTP redirect
+ * XXX: Do we still need FEATURE_FAST_REDIRECTS
+ * as compile-time option? The user can easily disable
+ * it in his action file.
*/
- if (q != csp->http->path)
+ char * redirect_mode;
+#endif /* def FEATURE_FAST_REDIRECTS */
+ char *old_url = NULL;
+ char *new_url = NULL;
+ char *redirection_string;
+
+ if ((csp->action->flags & ACTION_REDIRECT))
{
- log_error(LOG_LEVEL_REDIRECTS, "redirecting to: %s", q);
+ redirection_string = csp->action->string[ACTION_STRING_REDIRECT];
- if (NULL == (rsp = alloc_http_response()))
+ /*
+ * If the redirection string begins with 's',
+ * assume it's a pcrs command, otherwise treat it as
+ * properly formatted URL and use it for the redirection
+ * directly.
+ *
+ * According to RFC 2616 section 14.30 the URL
+ * has to be absolute and if the user tries:
+ * +redirect{shit/this/will/be/parsed/as/pcrs_command.html}
+ * she would get undefined results anyway.
+ *
+ */
+
+ if (*redirection_string == 's')
{
- return cgi_error_memory();
+ old_url = csp->http->url;
+ new_url = rewrite_url(old_url, redirection_string);
}
-
- if ( enlist_unique_header(rsp->headers, "Location", q)
- || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+ else
{
- free_http_response(rsp);
- return cgi_error_memory();
+ log_error(LOG_LEVEL_REDIRECTS,
+ "No pcrs command recognized, assuming that \"%s\" is already properly formatted.",
+ redirection_string);
+ new_url = strdup(redirection_string);
}
+ }
- return finish_http_response(rsp);
+#ifdef FEATURE_FAST_REDIRECTS
+ if ((csp->action->flags & ACTION_FAST_REDIRECTS))
+ {
+ redirect_mode = csp->action->string[ACTION_STRING_FAST_REDIRECTS];
+
+ /*
+ * If it exists, use the previously rewritten URL as input
+ * otherwise just use the old path.
+ */
+ old_url = new_url ? new_url : strdup(csp->http->path);
+ new_url = get_last_url(old_url, redirect_mode);
+ freez(old_url);
}
- else
+#endif /* def FEATURE_FAST_REDIRECTS */
+
+ /* Did any redirect action trigger? */
+ if (new_url)
{
- return NULL;
+ if (0 == strcmpic(new_url, csp->http->url))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "New URL \"%s\" and old URL \"%s\" are the same. Redirection loop prevented.",
+ csp->http->url, new_url);
+ freez(new_url);
+ }
+ else
+ {
+ log_error(LOG_LEVEL_REDIRECTS, "New URL is: %s", new_url);
+
+ if (NULL == (rsp = alloc_http_response()))
+ {
+ freez(new_url);
+ return cgi_error_memory();
+ }
+
+ if ( enlist_unique_header(rsp->headers, "Location", new_url)
+ || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+ {
+ freez(new_url);
+ free_http_response(rsp);
+ return cgi_error_memory();
+ }
+ freez(new_url);
+ return finish_http_response(rsp);
+ }
}
+ /* Only reached if no redirect is required */
+ return NULL;
+
}
-#endif /* def FEATURE_FAST_REDIRECTS */
#ifdef FEATURE_IMAGE_BLOCKING
char *pcrs_filter_response(struct client_state *csp)
{
int hits=0;
- size_t size;
+ size_t size, prev_size;
char *old = csp->iob->cur, *new = NULL;
pcrs_job *job;
{
fl = csp->rlist[i];
if ((NULL == fl) || (NULL == fl->f))
- break;
+ {
+ /*
+ * Either there are no filter files
+ * left, or this filter file just
+ * contains no valid filters.
+ *
+ * Continue to be sure we don't miss
+ * valid filter files that are chained
+ * after empty or invalid ones.
+ */
+ continue;
+ }
/*
* For all applying +filter actions, look if a filter by that
* name exists and if yes, execute it's pcrs_joblist on the
continue;
}
- log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) with filter %s...",
- csp->http->hostport, csp->http->path, size, b->name);
-
+ prev_size = size;
/* Apply all jobs from the joblist */
for (job = b->joblist; NULL != job; job = job->next)
{
old=new;
}
- log_error(LOG_LEVEL_RE_FILTER, " ...produced %d hits (new size %d).", current_hits, size);
+ log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) with filter %s produced %d hits (new size %d).",
+ csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
+
hits += current_hits;
}
}