-const char parsers_rcs[] = "$Id: parsers.c,v 1.94 2007/03/21 12:23:53 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.103 2007/06/01 16:31:54 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/parsers.c,v $
*
* Revisions :
* $Log: parsers.c,v $
+ * Revision 1.103 2007/06/01 16:31:54 fabiankeil
+ * Change sed() to return a jb_err in preparation for forward-override{}.
+ *
+ * Revision 1.102 2007/05/27 12:39:32 fabiankeil
+ * Adjust "X-Filter: No" to disable dedicated header filters.
+ *
+ * Revision 1.101 2007/05/14 10:16:41 fabiankeil
+ * Streamline client_cookie_adder().
+ *
+ * Revision 1.100 2007/04/30 15:53:11 fabiankeil
+ * Make sure filters with dynamic jobs actually use them.
+ *
+ * Revision 1.99 2007/04/30 15:06:26 fabiankeil
+ * - Introduce dynamic pcrs jobs that can resolve variables.
+ * - Remove unnecessary update_action_bits_for_all_tags() call.
+ *
+ * Revision 1.98 2007/04/17 18:32:10 fabiankeil
+ * - Make tagging based on tags set by earlier taggers
+ * of the same kind possible.
+ * - Log whether or not new tags cause action bits updates
+ * (in which case a matching tag-pattern section exists).
+ * - Log if the user tries to set a tag that is already set.
+ *
+ * Revision 1.97 2007/04/15 16:39:21 fabiankeil
+ * Introduce tags as alternative way to specify which
+ * actions apply to a request. At the moment tags can be
+ * created based on client and server headers.
+ *
+ * Revision 1.96 2007/04/12 12:53:58 fabiankeil
+ * Log a warning if the content is compressed, filtering is
+ * enabled and Privoxy was compiled without zlib support.
+ * Closes FR#1673938.
+ *
+ * Revision 1.95 2007/03/25 14:26:40 fabiankeil
+ * - Fix warnings when compiled with glibc.
+ * - Don't use crumble() for cookie crunching.
+ * - Move cookie time parsing into parse_header_time().
+ * - Let parse_header_time() return a jb_err code
+ * instead of a pointer that can only be used to
+ * check for NULL anyway.
+ *
* Revision 1.94 2007/03/21 12:23:53 fabiankeil
* - Add better protection against malicious gzip headers.
* - Stop logging the first hundred bytes of decompressed content.
#include "jbsockets.h"
#include "miscutil.h"
#include "list.h"
+#include "actions.h"
+#include "filters.h"
#ifndef HAVE_STRPTIME
#include "strptime.h"
/* Fix a problem with Solaris. There should be no effect on other
* platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
+ * Solaris's isspace() is a macro which uses its argument directly
* as an array index. Therefore we need to make sure that high-bit
* characters generate +ve values, and ideally we also want to make
* the argument match the declared parameter type of "int".
#define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
#define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
+jb_err header_tagger(struct client_state *csp, char *header);
+jb_err scan_headers(struct client_state *csp);
const struct parsers client_patterns[] = {
{ "referer:", 8, client_referrer },
}
+
+/*********************************************************************
+ *
+ * Function : scan_headers
+ *
+ * Description : Scans headers, applies tags and updates action bits.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : JB_ERR_OK
+ *
+ *********************************************************************/
+jb_err scan_headers(struct client_state *csp)
+{
+ struct list_entry *h; /* Header */
+ jb_err err = JB_ERR_OK;
+
+ log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
+
+ for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next)
+ {
+ /* Header crunch()ed in previous run? -> ignore */
+ if (h->str == NULL) continue;
+ log_error(LOG_LEVEL_HEADER, "scan: %s", h->str);
+ err = header_tagger(csp, h->str);
+ }
+
+ return err;
+}
+
+
/*********************************************************************
*
* Function : sed
* headers (client or server)
* 3 : csp = Current client state (buffers, headers, etc...)
*
- * Returns : Single pointer to a fully formed header, or NULL
- * on out-of-memory error.
+ * Returns : JB_ERR_OK in case off success, or
+ * JB_ERR_MEMORY on out-of-memory error.
*
*********************************************************************/
-char *sed(const struct parsers pats[],
- const add_header_func_ptr more_headers[],
- struct client_state *csp)
+jb_err sed(const struct parsers pats[],
+ const add_header_func_ptr more_headers[],
+ struct client_state *csp)
{
struct list_entry *p;
const struct parsers *v;
if (first_run) /* Parse and print */
{
- log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
+ scan_headers(csp);
+
for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
{
for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
/* Header crunch()ed in previous run? -> ignore */
if (p->str == NULL) continue;
- if (v == pats) log_error(LOG_LEVEL_HEADER, "scan: %s", p->str);
-
/* Does the current parser handle this header? */
if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING))
{
}
}
- if (err != JB_ERR_OK)
+ return err;
+}
+
+
+
+/*********************************************************************
+ *
+ * Function : header_tagger
+ *
+ * Description : Executes all text substitutions from applying
+ * tag actions and saves the result as tag.
+ *
+ * XXX: Shares enough code with filter_header() and
+ * pcrs_filter_response() to warrant some helper functions.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : header = Header that is used as tagger input
+ *
+ * Returns : JB_ERR_OK on success and always succeeds
+ *
+ *********************************************************************/
+jb_err header_tagger(struct client_state *csp, char *header)
+{
+ int wanted_filter_type;
+ int multi_action_index;
+ int i;
+ pcrs_job *job;
+
+ struct file_list *fl;
+ struct re_filterfile_spec *b;
+ struct list_entry *tag_name;
+
+ int found_filters = 0;
+ const size_t header_length = strlen(header);
+
+ if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
+ {
+ wanted_filter_type = FT_SERVER_HEADER_TAGGER;
+ multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER;
+ }
+ else
+ {
+ wanted_filter_type = FT_CLIENT_HEADER_TAGGER;
+ multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER;
+ }
+
+ /* Check if there are any filters */
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if (NULL != fl)
+ {
+ if (NULL != fl->f)
+ {
+ found_filters = 1;
+ break;
+ }
+ }
+ }
+
+ if (0 == found_filters)
{
- return NULL;
+ log_error(LOG_LEVEL_ERROR, "Unable to get current state of regex tagging.");
+ return(JB_ERR_OK);
}
- return list_to_text(csp->headers);
-}
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if ((NULL == fl) || (NULL == fl->f))
+ {
+ /*
+ * Either there are no filter files
+ * left, or this filter file just
+ * contains no valid filters.
+ *
+ * Continue to be sure we don't miss
+ * valid filter files that are chained
+ * after empty or invalid ones.
+ */
+ continue;
+ }
+
+ /* For all filters, */
+ for (b = fl->f; b; b = b->next)
+ {
+ if (b->type != wanted_filter_type)
+ {
+ /* skip the ones we don't care about, */
+ continue;
+ }
+ /* leaving only taggers that could apply, of which we use the ones, */
+ for (tag_name = csp->action->multi[multi_action_index]->first;
+ NULL != tag_name; tag_name = tag_name->next)
+ {
+ /* that do apply, and */
+ if (strcmp(b->name, tag_name->str) == 0)
+ {
+ char *modified_tag = NULL;
+ char *tag = header;
+ size_t size = header_length;
+ pcrs_job *joblist = b->joblist;
+
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+
+ if (NULL == joblist)
+ {
+ log_error(LOG_LEVEL_RE_FILTER,
+ "Tagger %s has empty joblist. Nothing to do.", b->name);
+ continue;
+ }
+
+ /* execute their pcrs_joblist on the header. */
+ for (job = joblist; NULL != job; job = job->next)
+ {
+ const int hits = pcrs_execute(job, tag, size, &modified_tag, &size);
+
+ if (0 < hits)
+ {
+ /* Success, continue with the modified version. */
+ if (tag != header)
+ {
+ freez(tag);
+ }
+ tag = modified_tag;
+ }
+ else
+ {
+ /* Tagger doesn't match */
+ if (0 > hits)
+ {
+ /* Regex failure, log it but continue anyway. */
+ log_error(LOG_LEVEL_ERROR,
+ "Problems with tagger \'%s\' and header \'%s\': %s",
+ b->name, *header, pcrs_strerror(hits));
+ }
+ freez(modified_tag);
+ }
+ }
+ if (b->dynamic) pcrs_free_joblist(joblist);
+
+ /* If this tagger matched */
+ if (tag != header)
+ {
+ if (0 == size)
+ {
+ /*
+ * There is to technical limitation which makes
+ * it impossible to use empty tags, but I assume
+ * no one would do it intentionally.
+ */
+ freez(tag);
+ log_error(LOG_LEVEL_INFO,
+ "Tagger \'%s\' created an empty tag. Ignored.",
+ b->name);
+ continue;
+ }
+
+ if (!list_contains_item(csp->tags, tag))
+ {
+ if (JB_ERR_OK != enlist(csp->tags, tag))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Insufficient memory to add tag \'%s\', "
+ "based on tagger \'%s\' and header \'%s\'",
+ tag, b->name, *header);
+ }
+ else
+ {
+ char *action_message;
+ /*
+ * update the action bits right away, to make
+ * tagging based on tags set by earlier taggers
+ * of the same kind possible.
+ */
+ if (update_action_bits_for_tag(csp, tag))
+ {
+ action_message = "Action bits updated accordingly.";
+ }
+ else
+ {
+ action_message = "No action bits update necessary.";
+ }
+
+ log_error(LOG_LEVEL_HEADER,
+ "Tagger \'%s\' added tag \'%s\'. %s",
+ b->name, tag, action_message);
+ }
+ }
+ else
+ {
+ /* XXX: Is this log-worthy? */
+ log_error(LOG_LEVEL_HEADER,
+ "Tagger \'%s\' didn't add tag \'%s\'. "
+ "Tag already present", b->name, tag);
+ }
+ freez(tag);
+ } /* if the tagger matched */
+ } /* if the tagger applies */
+ } /* for every tagger that could apply */
+ } /* for all filters */
+ } /* for all filter files */
+
+ return JB_ERR_OK;
+}
/* here begins the family of parser functions that reformat header lines */
int wanted_filter_type;
int multi_action_index;
+ if (csp->flags & CSP_FLAG_NO_FILTERING)
+ {
+ return JB_ERR_OK;
+ }
+
if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
{
wanted_filter_type = FT_SERVER_HEADER_FILTER;
if (strcmp(b->name, filtername->str) == 0)
{
int current_hits = 0;
+ pcrs_job *joblist = b->joblist;
+
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
- if ( NULL == b->joblist )
+ if (NULL == joblist)
{
log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
continue;
*header, size, b->name);
/* Apply all jobs from the joblist */
- for (job = b->joblist; NULL != job; job = job->next)
+ for (job = joblist; NULL != job; job = job->next)
{
matches = pcrs_execute(job, *header, size, &newheader, &size);
if ( 0 < matches )
}
}
}
+
+ if (b->dynamic) pcrs_free_joblist(joblist);
+
log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size);
hits += current_hits;
}
jb_err server_content_type(struct client_state *csp, char **header)
{
/* Remove header if it isn't the first Content-Type header */
- if(csp->content_type && (csp->content_type != CT_TABOO))
+ if ((csp->content_type & CT_DECLARED))
{
/*
* Another, slightly slower, way to see if
return JB_ERR_OK;
}
+ /*
+ * Signal that the Content-Type has been set.
+ */
+ csp->content_type |= CT_DECLARED;
+
if (!(csp->content_type & CT_TABOO))
{
if ((strstr(*header, " text/") && !strstr(*header, "plain"))
{
csp->content_type |= CT_JPEG;
}
- else
- {
- csp->content_type = 0;
- }
- }
- /*
- * Are we enabling text mode by force?
- */
- if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
- {
- /*
- * Do we really have to?
- */
- if (csp->content_type & CT_TEXT)
- {
- log_error(LOG_LEVEL_HEADER, "Text mode is already enabled.");
- }
- else
- {
- csp->content_type |= CT_TEXT;
- log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!");
- }
}
+
/*
* Are we messing with the content type?
*/
"Enable force-text-mode if you know what you're doing.", *header);
}
}
+
return JB_ERR_OK;
}
* Body is compressed, turn off pcrs and gif filtering.
*/
csp->content_type |= CT_TABOO;
+
+ /*
+ * Log a warning if the user expects the content to be filtered.
+ */
+ if ((csp->rlist != NULL) &&
+ (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
+ {
+ log_error(LOG_LEVEL_INFO,
+ "Compressed content detected, content filtering disabled. "
+ "Consider recompiling Privoxy with zlib support or "
+ "enable the prevent-compression action.");
+ }
}
-#endif /* !defined(FEATURE_ZLIB) */
+#endif /* defined(FEATURE_ZLIB) */
return JB_ERR_OK;
}
else
{
- csp->content_type = CT_TABOO;
- csp->action->flags &= ~ACTION_FILTER_SERVER_HEADERS;
- csp->action->flags &= ~ACTION_FILTER_CLIENT_HEADERS;
+ csp->content_type = CT_TABOO; /* XXX: This hack shouldn't be necessary */
+ csp->flags |= CSP_FLAG_NO_FILTERING;
log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering.");
}
log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
*
* Function : client_cookie_adder
*
- * Description : Used in the add_client_headers list. Called from `sed'.
- *
- * XXX: Remove csp->cookie_list which is no longer used.
+ * Description : Used in the add_client_headers list to add "wafers".
+ * Called from `sed'.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
*********************************************************************/
jb_err client_cookie_adder(struct client_state *csp)
{
- struct list_entry *lst;
char *tmp;
- struct list_entry *list1 = csp->cookie_list->first;
- struct list_entry *list2 = csp->action->multi[ACTION_MULTI_WAFER]->first;
- int first_cookie = 1;
+ struct list_entry *wafer;
+ struct list_entry *wafer_list = csp->action->multi[ACTION_MULTI_WAFER]->first;
jb_err err;
- if ((list1 == NULL) && (list2 == NULL))
+ if (NULL == wafer_list)
{
/* Nothing to do */
return JB_ERR_OK;
tmp = strdup("Cookie: ");
- for (lst = list1; lst ; lst = lst->next)
+ for (wafer = wafer_list; (NULL != tmp) && (NULL != wafer); wafer = wafer->next)
{
- if (first_cookie)
- {
- first_cookie = 0;
- }
- else
+ if (wafer != wafer_list)
{
+ /* As this isn't the first wafer, we need a delimiter. */
string_append(&tmp, "; ");
}
- string_append(&tmp, lst->str);
- }
-
- for (lst = list2; lst ; lst = lst->next)
- {
- if (first_cookie)
- {
- first_cookie = 0;
- }
- else
- {
- string_append(&tmp, "; ");
- }
- string_join(&tmp, cookie_encode(lst->str));
+ string_join(&tmp, cookie_encode(wafer->str));
}
if (tmp == NULL)
*********************************************************************/
jb_err server_http(struct client_state *csp, char **header)
{
- /* Signal that were now parsing server headers. */
- csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE;
-
sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status));
if (csp->http->status == 206)
{