-const char parsers_rcs[] = "$Id: parsers.c,v 1.93 2007/03/20 15:21:44 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.102 2007/05/27 12:39:32 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/parsers.c,v $
*
* Revisions :
* $Log: parsers.c,v $
+ * Revision 1.102 2007/05/27 12:39:32 fabiankeil
+ * Adjust "X-Filter: No" to disable dedicated header filters.
+ *
+ * Revision 1.101 2007/05/14 10:16:41 fabiankeil
+ * Streamline client_cookie_adder().
+ *
+ * Revision 1.100 2007/04/30 15:53:11 fabiankeil
+ * Make sure filters with dynamic jobs actually use them.
+ *
+ * Revision 1.99 2007/04/30 15:06:26 fabiankeil
+ * - Introduce dynamic pcrs jobs that can resolve variables.
+ * - Remove unnecessary update_action_bits_for_all_tags() call.
+ *
+ * Revision 1.98 2007/04/17 18:32:10 fabiankeil
+ * - Make tagging based on tags set by earlier taggers
+ * of the same kind possible.
+ * - Log whether or not new tags cause action bits updates
+ * (in which case a matching tag-pattern section exists).
+ * - Log if the user tries to set a tag that is already set.
+ *
+ * Revision 1.97 2007/04/15 16:39:21 fabiankeil
+ * Introduce tags as alternative way to specify which
+ * actions apply to a request. At the moment tags can be
+ * created based on client and server headers.
+ *
+ * Revision 1.96 2007/04/12 12:53:58 fabiankeil
+ * Log a warning if the content is compressed, filtering is
+ * enabled and Privoxy was compiled without zlib support.
+ * Closes FR#1673938.
+ *
+ * Revision 1.95 2007/03/25 14:26:40 fabiankeil
+ * - Fix warnings when compiled with glibc.
+ * - Don't use crumble() for cookie crunching.
+ * - Move cookie time parsing into parse_header_time().
+ * - Let parse_header_time() return a jb_err code
+ * instead of a pointer that can only be used to
+ * check for NULL anyway.
+ *
+ * Revision 1.94 2007/03/21 12:23:53 fabiankeil
+ * - Add better protection against malicious gzip headers.
+ * - Stop logging the first hundred bytes of decompressed content.
+ * It looks like it's working and there is always debug 16.
+ * - Log the content size after decompression in decompress_iob()
+ * instead of pcrs_filter_response().
+ *
* Revision 1.93 2007/03/20 15:21:44 fabiankeil
* - Use dedicated header filter actions instead of abusing "filter".
* Replace "filter-client-headers" and "filter-client-headers"
#include <ctype.h>
#include <assert.h>
#include <string.h>
+
+#ifdef __GLIBC__
+/*
+ * Convince GNU's libc to provide a strptime prototype.
+ */
+#define __USE_XOPEN
+#endif /*__GLIBC__ */
#include <time.h>
#ifdef FEATURE_ZLIB
#include "jbsockets.h"
#include "miscutil.h"
#include "list.h"
+#include "actions.h"
+#include "filters.h"
#ifndef HAVE_STRPTIME
#include "strptime.h"
/* Fix a problem with Solaris. There should be no effect on other
* platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
+ * Solaris's isspace() is a macro which uses its argument directly
* as an array index. Therefore we need to make sure that high-bit
* characters generate +ve values, and ideally we also want to make
* the argument match the declared parameter type of "int".
#define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
#define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
+jb_err header_tagger(struct client_state *csp, char *header);
+jb_err scan_headers(struct client_state *csp);
const struct parsers client_patterns[] = {
{ "referer:", 8, client_referrer },
}
+
+/*********************************************************************
+ *
+ * Function : scan_headers
+ *
+ * Description : Scans headers, applies tags and updates action bits.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : JB_ERR_OK
+ *
+ *********************************************************************/
+jb_err scan_headers(struct client_state *csp)
+{
+ struct list_entry *h; /* Header */
+ jb_err err = JB_ERR_OK;
+
+ log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
+
+ for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next)
+ {
+ /* Header crunch()ed in previous run? -> ignore */
+ if (h->str == NULL) continue;
+ log_error(LOG_LEVEL_HEADER, "scan: %s", h->str);
+ err = header_tagger(csp, h->str);
+ }
+
+ return err;
+}
+
+
/*********************************************************************
*
* Function : sed
* headers (client or server)
* 3 : csp = Current client state (buffers, headers, etc...)
*
- * Returns : Single pointer to a fully formed header, or NULL
- * on out-of-memory error.
+ * Returns : JB_ERR_OK in case off success, or
+ * JB_ERR_MEMORY on out-of-memory error.
*
*********************************************************************/
-char *sed(const struct parsers pats[],
- const add_header_func_ptr more_headers[],
- struct client_state *csp)
+jb_err sed(const struct parsers pats[],
+ const add_header_func_ptr more_headers[],
+ struct client_state *csp)
{
struct list_entry *p;
const struct parsers *v;
if (first_run) /* Parse and print */
{
- log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
+ scan_headers(csp);
+
for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
{
for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
/* Header crunch()ed in previous run? -> ignore */
if (p->str == NULL) continue;
- if (v == pats) log_error(LOG_LEVEL_HEADER, "scan: %s", p->str);
-
/* Does the current parser handle this header? */
if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING))
{
}
}
- if (err != JB_ERR_OK)
+ return err;
+}
+
+
+
+/*********************************************************************
+ *
+ * Function : header_tagger
+ *
+ * Description : Executes all text substitutions from applying
+ * tag actions and saves the result as tag.
+ *
+ * XXX: Shares enough code with filter_header() and
+ * pcrs_filter_response() to warrant some helper functions.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : header = Header that is used as tagger input
+ *
+ * Returns : JB_ERR_OK on success and always succeeds
+ *
+ *********************************************************************/
+jb_err header_tagger(struct client_state *csp, char *header)
+{
+ int wanted_filter_type;
+ int multi_action_index;
+ int i;
+ pcrs_job *job;
+
+ struct file_list *fl;
+ struct re_filterfile_spec *b;
+ struct list_entry *tag_name;
+
+ int found_filters = 0;
+ const size_t header_length = strlen(header);
+
+ if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
+ {
+ wanted_filter_type = FT_SERVER_HEADER_TAGGER;
+ multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER;
+ }
+ else
+ {
+ wanted_filter_type = FT_CLIENT_HEADER_TAGGER;
+ multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER;
+ }
+
+ /* Check if there are any filters */
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if (NULL != fl)
+ {
+ if (NULL != fl->f)
+ {
+ found_filters = 1;
+ break;
+ }
+ }
+ }
+
+ if (0 == found_filters)
{
- return NULL;
+ log_error(LOG_LEVEL_ERROR, "Unable to get current state of regex tagging.");
+ return(JB_ERR_OK);
}
- return list_to_text(csp->headers);
-}
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if ((NULL == fl) || (NULL == fl->f))
+ {
+ /*
+ * Either there are no filter files
+ * left, or this filter file just
+ * contains no valid filters.
+ *
+ * Continue to be sure we don't miss
+ * valid filter files that are chained
+ * after empty or invalid ones.
+ */
+ continue;
+ }
+ /* For all filters, */
+ for (b = fl->f; b; b = b->next)
+ {
+ if (b->type != wanted_filter_type)
+ {
+ /* skip the ones we don't care about, */
+ continue;
+ }
+ /* leaving only taggers that could apply, of which we use the ones, */
+ for (tag_name = csp->action->multi[multi_action_index]->first;
+ NULL != tag_name; tag_name = tag_name->next)
+ {
+ /* that do apply, and */
+ if (strcmp(b->name, tag_name->str) == 0)
+ {
+ char *modified_tag = NULL;
+ char *tag = header;
+ size_t size = header_length;
+ pcrs_job *joblist = b->joblist;
+
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+
+ if (NULL == joblist)
+ {
+ log_error(LOG_LEVEL_RE_FILTER,
+ "Tagger %s has empty joblist. Nothing to do.", b->name);
+ continue;
+ }
+
+ /* execute their pcrs_joblist on the header. */
+ for (job = joblist; NULL != job; job = job->next)
+ {
+ const int hits = pcrs_execute(job, tag, size, &modified_tag, &size);
+
+ if (0 < hits)
+ {
+ /* Success, continue with the modified version. */
+ if (tag != header)
+ {
+ freez(tag);
+ }
+ tag = modified_tag;
+ }
+ else
+ {
+ /* Tagger doesn't match */
+ if (0 > hits)
+ {
+ /* Regex failure, log it but continue anyway. */
+ log_error(LOG_LEVEL_ERROR,
+ "Problems with tagger \'%s\' and header \'%s\': %s",
+ b->name, *header, pcrs_strerror(hits));
+ }
+ freez(modified_tag);
+ }
+ }
+
+ if (b->dynamic) pcrs_free_joblist(joblist);
+
+ /* If this tagger matched */
+ if (tag != header)
+ {
+ if (0 == size)
+ {
+ /*
+ * There is to technical limitation which makes
+ * it impossible to use empty tags, but I assume
+ * no one would do it intentionally.
+ */
+ freez(tag);
+ log_error(LOG_LEVEL_INFO,
+ "Tagger \'%s\' created an empty tag. Ignored.",
+ b->name);
+ continue;
+ }
+
+ if (!list_contains_item(csp->tags, tag))
+ {
+ if (JB_ERR_OK != enlist(csp->tags, tag))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Insufficient memory to add tag \'%s\', "
+ "based on tagger \'%s\' and header \'%s\'",
+ tag, b->name, *header);
+ }
+ else
+ {
+ char *action_message;
+ /*
+ * update the action bits right away, to make
+ * tagging based on tags set by earlier taggers
+ * of the same kind possible.
+ */
+ if (update_action_bits_for_tag(csp, tag))
+ {
+ action_message = "Action bits updated accordingly.";
+ }
+ else
+ {
+ action_message = "No action bits update necessary.";
+ }
+
+ log_error(LOG_LEVEL_HEADER,
+ "Tagger \'%s\' added tag \'%s\'. %s",
+ b->name, tag, action_message);
+ }
+ }
+ else
+ {
+ /* XXX: Is this log-worthy? */
+ log_error(LOG_LEVEL_HEADER,
+ "Tagger \'%s\' didn't add tag \'%s\'. "
+ "Tag already present", b->name, tag);
+ }
+ freez(tag);
+ } /* if the tagger matched */
+ } /* if the tagger applies */
+ } /* for every tagger that could apply */
+ } /* for all filters */
+ } /* for all filter files */
+
+ return JB_ERR_OK;
+}
/* here begins the family of parser functions that reformat header lines */
int wanted_filter_type;
int multi_action_index;
+ if (csp->flags & CSP_FLAG_NO_FILTERING)
+ {
+ return JB_ERR_OK;
+ }
+
if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
{
wanted_filter_type = FT_SERVER_HEADER_FILTER;
if (strcmp(b->name, filtername->str) == 0)
{
int current_hits = 0;
+ pcrs_job *joblist = b->joblist;
+
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
- if ( NULL == b->joblist )
+ if (NULL == joblist)
{
log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
continue;
*header, size, b->name);
/* Apply all jobs from the joblist */
- for (job = b->joblist; NULL != job; job = job->next)
+ for (job = joblist; NULL != job; job = job->next)
{
matches = pcrs_execute(job, *header, size, &newheader, &size);
if ( 0 < matches )
}
}
}
+
+ if (b->dynamic) pcrs_free_joblist(joblist);
+
log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size);
hits += current_hits;
}
* Body is compressed, turn off pcrs and gif filtering.
*/
csp->content_type |= CT_TABOO;
+
+ /*
+ * Log a warning if the user expects the content to be filtered.
+ */
+ if ((csp->rlist != NULL) &&
+ (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
+ {
+ log_error(LOG_LEVEL_INFO,
+ "Compressed content detected, content filtering disabled. "
+ "Consider recompiling Privoxy with zlib support or "
+ "enable the prevent-compression action.");
+ }
}
-#endif /* !defined(FEATURE_ZLIB) */
+#endif /* defined(FEATURE_ZLIB) */
return JB_ERR_OK;
}
else if (0 == strcmpic(newval, "randomize"))
{
+ const char *header_time = *header + sizeof("Last-Modified:");
+
log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header);
now = time(NULL);
#ifdef HAVE_GMTIME_R
#else
timeptr = gmtime(&now);
#endif
- if ((timeptr = parse_header_time(*header, &last_modified)) == NULL)
+ if (JB_ERR_OK != parse_header_time(header_time, &last_modified))
{
- log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header);
+ log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header);
freez(*header);
}
else
}
else /* add random value */
{
- if ((timeptr = parse_header_time(*header, &tm)) == NULL)
+ const char *header_time = *header + sizeof("If-Modified-Since:");
+
+ if (JB_ERR_OK != parse_header_time(header_time, &tm))
{
- log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header);
+ log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header);
freez(*header);
}
else
}
else
{
- csp->content_type = CT_TABOO;
- csp->action->flags &= ~ACTION_FILTER_SERVER_HEADERS;
- csp->action->flags &= ~ACTION_FILTER_CLIENT_HEADERS;
+ csp->content_type = CT_TABOO; /* XXX: This hack shouldn't be necessary */
+ csp->flags |= CSP_FLAG_NO_FILTERING;
log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering.");
}
log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
*
* Function : client_cookie_adder
*
- * Description : Used in the add_client_headers list. Called from `sed'.
- *
- * XXX: Remove csp->cookie_list which is no longer used.
+ * Description : Used in the add_client_headers list to add "wafers".
+ * Called from `sed'.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
*********************************************************************/
jb_err client_cookie_adder(struct client_state *csp)
{
- struct list_entry *lst;
char *tmp;
- struct list_entry *list1 = csp->cookie_list->first;
- struct list_entry *list2 = csp->action->multi[ACTION_MULTI_WAFER]->first;
- int first_cookie = 1;
+ struct list_entry *wafer;
+ struct list_entry *wafer_list = csp->action->multi[ACTION_MULTI_WAFER]->first;
jb_err err;
- if ((list1 == NULL) && (list2 == NULL))
+ if (NULL == wafer_list)
{
/* Nothing to do */
return JB_ERR_OK;
tmp = strdup("Cookie: ");
- for (lst = list1; lst ; lst = lst->next)
+ for (wafer = wafer_list; (NULL != tmp) && (NULL != wafer); wafer = wafer->next)
{
- if (first_cookie)
- {
- first_cookie = 0;
- }
- else
+ if (wafer != wafer_list)
{
+ /* As this isn't the first wafer, we need a delimiter. */
string_append(&tmp, "; ");
}
- string_append(&tmp, lst->str);
- }
-
- for (lst = list2; lst ; lst = lst->next)
- {
- if (first_cookie)
- {
- first_cookie = 0;
- }
- else
- {
- string_append(&tmp, "; ");
- }
- string_join(&tmp, cookie_encode(lst->str));
+ string_join(&tmp, cookie_encode(wafer->str));
}
if (tmp == NULL)
*********************************************************************/
jb_err server_http(struct client_state *csp, char **header)
{
- /* Signal that were now parsing server headers. */
- csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE;
-
sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status));
if (csp->http->status == 206)
{
time_t now;
time_t cookie_time;
struct tm tm_now;
- struct tm tm_cookie;
time(&now);
#ifdef FEATURE_COOKIE_JAR
if ((csp->action->flags & ACTION_NO_COOKIE_SET) != 0)
{
- log_error(LOG_LEVEL_HEADER, "Crunched incoming cookie -- yum!");
- return crumble(csp, header);
+ log_error(LOG_LEVEL_HEADER, "Crunching incoming cookie: %s", *header);
+ freez(*header);
}
else if ((csp->action->flags & ACTION_NO_COOKIE_KEEP) != 0)
{
*/
if ((strncmpic(cur_tag, "expires=", 8) == 0) && *(cur_tag + 8))
{
- char *match;
- const char *expiration_date = cur_tag + 8; /* Skip "[Ee]xpires=" */
- memset(&tm_cookie, 0, sizeof(tm_cookie));
- /*
- * Try the valid time formats we know about.
- *
- * XXX: This should be moved to parse_header_time().
- *
- * XXX: Maybe the log messages should be removed
- * for the next stable release. They just exist to
- * see which time format gets the most hits and
- * should be checked for first.
- */
- if (NULL != (match = strptime(expiration_date, "%a, %e-%b-%y %H:%M:%S ", &tm_cookie)))
- {
- /* 22-Feb-2008 12:01:18 GMT */
- log_error(LOG_LEVEL_HEADER,
- "cookie \'%s\' send by %s appears to be using time format 1.",
- *header, csp->http->url);
- }
- else if (NULL != (match = strptime(expiration_date, "%A, %e-%b-%Y %H:%M:%S ", &tm_cookie)))
- {
- /* Tue, 02-Jun-2037 20:00:00 GMT */
- log_error(LOG_LEVEL_HEADER,
- "cookie \'%s\' send by %s appears to be using time format 2.",
- *header, csp->http->url);
- }
- else if (NULL != (match = strptime(expiration_date, "%a, %e-%b-%Y %H:%M:%S ", &tm_cookie)))
- {
- /* Tuesday, 02-Jun-2037 20:00:00 GMT */
- /*
- * On FreeBSD this is never reached because it's handled
- * by "format 2" as well. I am, however, not sure if all
- * strptime() implementations behave that way.
- */
- log_error(LOG_LEVEL_HEADER,
- "cookie \'%s\' send by %s appears to be using time format 3.",
- *header, csp->http->url);
- }
- else if (NULL != (match = strptime(expiration_date, "%a, %e %b %Y %H:%M:%S ", &tm_cookie)))
- {
- /* Fri, 22 Feb 2008 19:20:05 GMT */
- log_error(LOG_LEVEL_HEADER,
- "cookie \'%s\' send by %s appears to be using time format 4.",
- *header, csp->http->url);
- }
- else if (NULL != (match = strptime(expiration_date, "%A %b %e %H:%M:%S %Y", &tm_cookie)))
- {
- /* Thu Mar 08 23:00:00 2007 GMT */
- log_error(LOG_LEVEL_HEADER,
- "cookie \'%s\' send by %s appears to be using time format 5.",
- *header, csp->http->url);
- }
+ char *expiration_date = cur_tag + 8; /* Skip "[Ee]xpires=" */
- /* Did any of them match? */
- if (NULL == match)
+ /* Did we detect the date properly? */
+ if (JB_ERR_OK != parse_header_time(expiration_date, &cookie_time))
{
/*
* Nope, treat it as if it was still valid.
* anyway, which in many cases will be shorter
* than a browser session.
*/
- cookie_time = timegm(&tm_cookie);
if (cookie_time - now < 0)
{
log_error(LOG_LEVEL_HEADER,
*
* Function : parse_header_time
*
- * Description : Transforms time inside a HTTP header into
- * the usual time format.
+ * Description : Parses time formats used in HTTP header strings
+ * to get the numerical respresentation.
*
* Parameters :
- * 1 : header = header to parse
- * 2 : tm = storage for the resulting time in seconds
+ * 1 : header_time = HTTP header time as string.
+ * 2 : result = storage for header_time in seconds
*
- * Returns : Time struct containing the header time, or
- * NULL in case of a parsing problems.
+ * Returns : JB_ERR_OK if the time format was recognized, or
+ * JB_ERR_PARSE otherwise.
*
*********************************************************************/
-struct tm *parse_header_time(char *header, time_t *tm) {
-
- char * timestring;
+jb_err parse_header_time(const char *header_time, time_t *result)
+{
struct tm gmt;
- struct tm * timeptr;
/*
- * Initializing gmt to prevent time zone offsets.
+ * Zero out gmt to prevent time zone offsets.
*
* While this is only necessary on some platforms
* (mingw32 for example), I don't know how to
* detect these automatically and doing it everywhere
* shouldn't hurt.
*/
- time(tm);
-#ifdef HAVE_LOCALTIME_R
- gmt = *localtime_r(tm, &gmt);
-#elif FEATURE_PTHREAD
- pthread_mutex_lock(&localtime_mutex);
- gmt = *localtime(tm);
- pthread_mutex_unlock(&localtime_mutex);
-#else
- gmt = *localtime(tm);
-#endif
-
- /* Skipping header name */
- timestring = strstr(header, ": ");
- if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", &gmt) == NULL)
- {
- timeptr = NULL;
- }
- else
+ memset(&gmt, 0, sizeof(gmt));
+
+ /* Tue, 02 Jun 2037 20:00:00 */
+ if ((NULL == strptime(header_time, "%a, %d %b %Y %H:%M:%S", &gmt))
+ /* Tue, 02-Jun-2037 20:00:00 */
+ && (NULL == strptime(header_time, "%a, %d-%b-%Y %H:%M:%S", &gmt))
+ /* Tue, 02-Jun-37 20:00:00 */
+ && (NULL == strptime(header_time, "%a, %d-%b-%y %H:%M:%S", &gmt))
+ /* Tuesday, 02-Jun-2037 20:00:00 */
+ && (NULL == strptime(header_time, "%A, %d-%b-%Y %H:%M:%S", &gmt))
+ /* Tuesday Jun 02 20:00:00 2037 */
+ && (NULL == strptime(header_time, "%A %b %d %H:%M:%S %Y", &gmt)))
{
- *tm = timegm(&gmt);
- timeptr = &gmt;
+ return JB_ERR_PARSE;
}
- return(timeptr);
+
+ *result = timegm(&gmt);
+
+ return JB_ERR_OK;
}