X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=parsers.c;h=df7538a9535486e8139c490e4c25f4d8a9cb5f6a;hb=beebc6650fffc2169d51d901f229aede4670eb58;hp=2723a9071c689160aae855f56bbfbd20a63a55b1;hpb=24283751261d4d07ef121a1b2d3803ad54249a5c;p=privoxy.git diff --git a/parsers.c b/parsers.c index 2723a907..df7538a9 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.94 2007/03/21 12:23:53 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.103 2007/06/01 16:31:54 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -44,6 +44,47 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.94 2007/03/21 12:23:53 fabiankeil * * Revisions : * $Log: parsers.c,v $ + * Revision 1.103 2007/06/01 16:31:54 fabiankeil + * Change sed() to return a jb_err in preparation for forward-override{}. + * + * Revision 1.102 2007/05/27 12:39:32 fabiankeil + * Adjust "X-Filter: No" to disable dedicated header filters. + * + * Revision 1.101 2007/05/14 10:16:41 fabiankeil + * Streamline client_cookie_adder(). + * + * Revision 1.100 2007/04/30 15:53:11 fabiankeil + * Make sure filters with dynamic jobs actually use them. + * + * Revision 1.99 2007/04/30 15:06:26 fabiankeil + * - Introduce dynamic pcrs jobs that can resolve variables. + * - Remove unnecessary update_action_bits_for_all_tags() call. + * + * Revision 1.98 2007/04/17 18:32:10 fabiankeil + * - Make tagging based on tags set by earlier taggers + * of the same kind possible. + * - Log whether or not new tags cause action bits updates + * (in which case a matching tag-pattern section exists). + * - Log if the user tries to set a tag that is already set. + * + * Revision 1.97 2007/04/15 16:39:21 fabiankeil + * Introduce tags as alternative way to specify which + * actions apply to a request. At the moment tags can be + * created based on client and server headers. + * + * Revision 1.96 2007/04/12 12:53:58 fabiankeil + * Log a warning if the content is compressed, filtering is + * enabled and Privoxy was compiled without zlib support. + * Closes FR#1673938. + * + * Revision 1.95 2007/03/25 14:26:40 fabiankeil + * - Fix warnings when compiled with glibc. + * - Don't use crumble() for cookie crunching. + * - Move cookie time parsing into parse_header_time(). + * - Let parse_header_time() return a jb_err code + * instead of a pointer that can only be used to + * check for NULL anyway. + * * Revision 1.94 2007/03/21 12:23:53 fabiankeil * - Add better protection against malicious gzip headers. * - Stop logging the first hundred bytes of decompressed content. @@ -680,6 +721,8 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.94 2007/03/21 12:23:53 fabiankeil #include "jbsockets.h" #include "miscutil.h" #include "list.h" +#include "actions.h" +#include "filters.h" #ifndef HAVE_STRPTIME #include "strptime.h" @@ -689,7 +732,7 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION; /* Fix a problem with Solaris. There should be no effect on other * platforms. - * Solaris's isspace() is a macro which uses it's argument directly + * Solaris's isspace() is a macro which uses its argument directly * as an array index. Therefore we need to make sure that high-bit * characters generate +ve values, and ideally we also want to make * the argument match the declared parameter type of "int". @@ -700,6 +743,8 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION; #define ijb_isupper(__X) isupper((int)(unsigned char)(__X)) #define ijb_tolower(__X) tolower((int)(unsigned char)(__X)) +jb_err header_tagger(struct client_state *csp, char *header); +jb_err scan_headers(struct client_state *csp); const struct parsers client_patterns[] = { { "referer:", 8, client_referrer }, @@ -1357,6 +1402,38 @@ char *get_header_value(const struct list *header_list, const char *header_name) } + +/********************************************************************* + * + * Function : scan_headers + * + * Description : Scans headers, applies tags and updates action bits. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK + * + *********************************************************************/ +jb_err scan_headers(struct client_state *csp) +{ + struct list_entry *h; /* Header */ + jb_err err = JB_ERR_OK; + + log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); + + for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next) + { + /* Header crunch()ed in previous run? -> ignore */ + if (h->str == NULL) continue; + log_error(LOG_LEVEL_HEADER, "scan: %s", h->str); + err = header_tagger(csp, h->str); + } + + return err; +} + + /********************************************************************* * * Function : sed @@ -1375,13 +1452,13 @@ char *get_header_value(const struct list *header_list, const char *header_name) * headers (client or server) * 3 : csp = Current client state (buffers, headers, etc...) * - * Returns : Single pointer to a fully formed header, or NULL - * on out-of-memory error. + * Returns : JB_ERR_OK in case off success, or + * JB_ERR_MEMORY on out-of-memory error. * *********************************************************************/ -char *sed(const struct parsers pats[], - const add_header_func_ptr more_headers[], - struct client_state *csp) +jb_err sed(const struct parsers pats[], + const add_header_func_ptr more_headers[], + struct client_state *csp) { struct list_entry *p; const struct parsers *v; @@ -1397,7 +1474,8 @@ char *sed(const struct parsers pats[], if (first_run) /* Parse and print */ { - log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); + scan_headers(csp); + for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++) { for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next) @@ -1405,8 +1483,6 @@ char *sed(const struct parsers pats[], /* Header crunch()ed in previous run? -> ignore */ if (p->str == NULL) continue; - if (v == pats) log_error(LOG_LEVEL_HEADER, "scan: %s", p->str); - /* Does the current parser handle this header? */ if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING)) { @@ -1446,14 +1522,213 @@ char *sed(const struct parsers pats[], } } - if (err != JB_ERR_OK) + return err; +} + + + +/********************************************************************* + * + * Function : header_tagger + * + * Description : Executes all text substitutions from applying + * tag actions and saves the result as tag. + * + * XXX: Shares enough code with filter_header() and + * pcrs_filter_response() to warrant some helper functions. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = Header that is used as tagger input + * + * Returns : JB_ERR_OK on success and always succeeds + * + *********************************************************************/ +jb_err header_tagger(struct client_state *csp, char *header) +{ + int wanted_filter_type; + int multi_action_index; + int i; + pcrs_job *job; + + struct file_list *fl; + struct re_filterfile_spec *b; + struct list_entry *tag_name; + + int found_filters = 0; + const size_t header_length = strlen(header); + + if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) + { + wanted_filter_type = FT_SERVER_HEADER_TAGGER; + multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER; + } + else + { + wanted_filter_type = FT_CLIENT_HEADER_TAGGER; + multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER; + } + + /* Check if there are any filters */ + for (i = 0; i < MAX_AF_FILES; i++) + { + fl = csp->rlist[i]; + if (NULL != fl) + { + if (NULL != fl->f) + { + found_filters = 1; + break; + } + } + } + + if (0 == found_filters) { - return NULL; + log_error(LOG_LEVEL_ERROR, "Unable to get current state of regex tagging."); + return(JB_ERR_OK); } - return list_to_text(csp->headers); -} + for (i = 0; i < MAX_AF_FILES; i++) + { + fl = csp->rlist[i]; + if ((NULL == fl) || (NULL == fl->f)) + { + /* + * Either there are no filter files + * left, or this filter file just + * contains no valid filters. + * + * Continue to be sure we don't miss + * valid filter files that are chained + * after empty or invalid ones. + */ + continue; + } + + /* For all filters, */ + for (b = fl->f; b; b = b->next) + { + if (b->type != wanted_filter_type) + { + /* skip the ones we don't care about, */ + continue; + } + /* leaving only taggers that could apply, of which we use the ones, */ + for (tag_name = csp->action->multi[multi_action_index]->first; + NULL != tag_name; tag_name = tag_name->next) + { + /* that do apply, and */ + if (strcmp(b->name, tag_name->str) == 0) + { + char *modified_tag = NULL; + char *tag = header; + size_t size = header_length; + pcrs_job *joblist = b->joblist; + + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); + + if (NULL == joblist) + { + log_error(LOG_LEVEL_RE_FILTER, + "Tagger %s has empty joblist. Nothing to do.", b->name); + continue; + } + + /* execute their pcrs_joblist on the header. */ + for (job = joblist; NULL != job; job = job->next) + { + const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); + + if (0 < hits) + { + /* Success, continue with the modified version. */ + if (tag != header) + { + freez(tag); + } + tag = modified_tag; + } + else + { + /* Tagger doesn't match */ + if (0 > hits) + { + /* Regex failure, log it but continue anyway. */ + log_error(LOG_LEVEL_ERROR, + "Problems with tagger \'%s\' and header \'%s\': %s", + b->name, *header, pcrs_strerror(hits)); + } + freez(modified_tag); + } + } + if (b->dynamic) pcrs_free_joblist(joblist); + + /* If this tagger matched */ + if (tag != header) + { + if (0 == size) + { + /* + * There is to technical limitation which makes + * it impossible to use empty tags, but I assume + * no one would do it intentionally. + */ + freez(tag); + log_error(LOG_LEVEL_INFO, + "Tagger \'%s\' created an empty tag. Ignored.", + b->name); + continue; + } + + if (!list_contains_item(csp->tags, tag)) + { + if (JB_ERR_OK != enlist(csp->tags, tag)) + { + log_error(LOG_LEVEL_ERROR, + "Insufficient memory to add tag \'%s\', " + "based on tagger \'%s\' and header \'%s\'", + tag, b->name, *header); + } + else + { + char *action_message; + /* + * update the action bits right away, to make + * tagging based on tags set by earlier taggers + * of the same kind possible. + */ + if (update_action_bits_for_tag(csp, tag)) + { + action_message = "Action bits updated accordingly."; + } + else + { + action_message = "No action bits update necessary."; + } + + log_error(LOG_LEVEL_HEADER, + "Tagger \'%s\' added tag \'%s\'. %s", + b->name, tag, action_message); + } + } + else + { + /* XXX: Is this log-worthy? */ + log_error(LOG_LEVEL_HEADER, + "Tagger \'%s\' didn't add tag \'%s\'. " + "Tag already present", b->name, tag); + } + freez(tag); + } /* if the tagger matched */ + } /* if the tagger applies */ + } /* for every tagger that could apply */ + } /* for all filters */ + } /* for all filter files */ + + return JB_ERR_OK; +} /* here begins the family of parser functions that reformat header lines */ @@ -1493,6 +1768,11 @@ jb_err filter_header(struct client_state *csp, char **header) int wanted_filter_type; int multi_action_index; + if (csp->flags & CSP_FLAG_NO_FILTERING) + { + return JB_ERR_OK; + } + if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) { wanted_filter_type = FT_SERVER_HEADER_FILTER; @@ -1561,8 +1841,11 @@ jb_err filter_header(struct client_state *csp, char **header) if (strcmp(b->name, filtername->str) == 0) { int current_hits = 0; + pcrs_job *joblist = b->joblist; + + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); - if ( NULL == b->joblist ) + if (NULL == joblist) { log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name); continue; @@ -1572,7 +1855,7 @@ jb_err filter_header(struct client_state *csp, char **header) *header, size, b->name); /* Apply all jobs from the joblist */ - for (job = b->joblist; NULL != job; job = job->next) + for (job = joblist; NULL != job; job = job->next) { matches = pcrs_execute(job, *header, size, &newheader, &size); if ( 0 < matches ) @@ -1599,6 +1882,9 @@ jb_err filter_header(struct client_state *csp, char **header) } } } + + if (b->dynamic) pcrs_free_joblist(joblist); + log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size); hits += current_hits; } @@ -1758,7 +2044,7 @@ jb_err crunch_server_header(struct client_state *csp, char **header) jb_err server_content_type(struct client_state *csp, char **header) { /* Remove header if it isn't the first Content-Type header */ - if(csp->content_type && (csp->content_type != CT_TABOO)) + if ((csp->content_type & CT_DECLARED)) { /* * Another, slightly slower, way to see if @@ -1774,6 +2060,11 @@ jb_err server_content_type(struct client_state *csp, char **header) return JB_ERR_OK; } + /* + * Signal that the Content-Type has been set. + */ + csp->content_type |= CT_DECLARED; + if (!(csp->content_type & CT_TABOO)) { if ((strstr(*header, " text/") && !strstr(*header, "plain")) @@ -1790,29 +2081,8 @@ jb_err server_content_type(struct client_state *csp, char **header) { csp->content_type |= CT_JPEG; } - else - { - csp->content_type = 0; - } - } - /* - * Are we enabling text mode by force? - */ - if (csp->action->flags & ACTION_FORCE_TEXT_MODE) - { - /* - * Do we really have to? - */ - if (csp->content_type & CT_TEXT) - { - log_error(LOG_LEVEL_HEADER, "Text mode is already enabled."); - } - else - { - csp->content_type |= CT_TEXT; - log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!"); - } } + /* * Are we messing with the content type? */ @@ -1841,6 +2111,7 @@ jb_err server_content_type(struct client_state *csp, char **header) "Enable force-text-mode if you know what you're doing.", *header); } } + return JB_ERR_OK; } @@ -1975,8 +2246,20 @@ jb_err server_content_encoding(struct client_state *csp, char **header) * Body is compressed, turn off pcrs and gif filtering. */ csp->content_type |= CT_TABOO; + + /* + * Log a warning if the user expects the content to be filtered. + */ + if ((csp->rlist != NULL) && + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + { + log_error(LOG_LEVEL_INFO, + "Compressed content detected, content filtering disabled. " + "Consider recompiling Privoxy with zlib support or " + "enable the prevent-compression action."); + } } -#endif /* !defined(FEATURE_ZLIB) */ +#endif /* defined(FEATURE_ZLIB) */ return JB_ERR_OK; @@ -3081,9 +3364,8 @@ jb_err client_x_filter(struct client_state *csp, char **header) } else { - csp->content_type = CT_TABOO; - csp->action->flags &= ~ACTION_FILTER_SERVER_HEADERS; - csp->action->flags &= ~ACTION_FILTER_CLIENT_HEADERS; + csp->content_type = CT_TABOO; /* XXX: This hack shouldn't be necessary */ + csp->flags |= CSP_FLAG_NO_FILTERING; log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering."); } log_error(LOG_LEVEL_HEADER, "Crunching %s", *header); @@ -3151,9 +3433,8 @@ jb_err client_host_adder(struct client_state *csp) * * Function : client_cookie_adder * - * Description : Used in the add_client_headers list. Called from `sed'. - * - * XXX: Remove csp->cookie_list which is no longer used. + * Description : Used in the add_client_headers list to add "wafers". + * Called from `sed'. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -3164,14 +3445,12 @@ jb_err client_host_adder(struct client_state *csp) *********************************************************************/ jb_err client_cookie_adder(struct client_state *csp) { - struct list_entry *lst; char *tmp; - struct list_entry *list1 = csp->cookie_list->first; - struct list_entry *list2 = csp->action->multi[ACTION_MULTI_WAFER]->first; - int first_cookie = 1; + struct list_entry *wafer; + struct list_entry *wafer_list = csp->action->multi[ACTION_MULTI_WAFER]->first; jb_err err; - if ((list1 == NULL) && (list2 == NULL)) + if (NULL == wafer_list) { /* Nothing to do */ return JB_ERR_OK; @@ -3179,30 +3458,14 @@ jb_err client_cookie_adder(struct client_state *csp) tmp = strdup("Cookie: "); - for (lst = list1; lst ; lst = lst->next) + for (wafer = wafer_list; (NULL != tmp) && (NULL != wafer); wafer = wafer->next) { - if (first_cookie) - { - first_cookie = 0; - } - else + if (wafer != wafer_list) { + /* As this isn't the first wafer, we need a delimiter. */ string_append(&tmp, "; "); } - string_append(&tmp, lst->str); - } - - for (lst = list2; lst ; lst = lst->next) - { - if (first_cookie) - { - first_cookie = 0; - } - else - { - string_append(&tmp, "; "); - } - string_join(&tmp, cookie_encode(lst->str)); + string_join(&tmp, cookie_encode(wafer->str)); } if (tmp == NULL) @@ -3395,9 +3658,6 @@ jb_err connection_close_adder(struct client_state *csp) *********************************************************************/ jb_err server_http(struct client_state *csp, char **header) { - /* Signal that were now parsing server headers. */ - csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE; - sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status)); if (csp->http->status == 206) {