From: Fabian Keil Date: Sun, 15 Apr 2007 16:39:21 +0000 (+0000) Subject: Introduce tags as alternative way to specify which X-Git-Tag: v_3_0_7~272 X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=commitdiff_plain;h=22de1764b6460768a49d33328fe00e7320e7e618 Introduce tags as alternative way to specify which actions apply to a request. At the moment tags can be created based on client and server headers. --- diff --git a/actionlist.h b/actionlist.h index 3d36d66b..c4d19b6d 100644 --- a/actionlist.h +++ b/actionlist.h @@ -39,6 +39,11 @@ * * Revisions : * $Log: actionlist.h,v $ + * Revision 1.24 2007/03/20 15:16:34 fabiankeil + * Use dedicated header filter actions instead of abusing "filter". + * Replace "filter-client-headers" and "filter-client-headers" + * with "server-header-filter" and "client-header-filter". + * * Revision 1.23 2006/10/09 10:26:18 fabiankeil * Changed the path in set-image-blocker's redirection default to * "send-banner?type=pattern" instead of "show-banner?type=pattern" @@ -149,6 +154,7 @@ DEFINE_ACTION_MULTI ("add-header", ACTION_MULTI_ADD_HEADER) DEFINE_ACTION_BOOL ("block", ACTION_BLOCK) DEFINE_ACTION_MULTI ("client-header-filter", ACTION_MULTI_CLIENT_HEADER_FILTER) +DEFINE_ACTION_MULTI ("client-header-tagger", ACTION_MULTI_CLIENT_HEADER_TAGGER) DEFINE_ACTION_STRING ("content-type-overwrite", ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE) DEFINE_CGI_PARAM_NO_RADIO("content-type-overwrite", ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE, "text/html") DEFINE_ACTION_STRING ("crunch-client-header", ACTION_CRUNCH_CLIENT_HEADER, ACTION_STRING_CLIENT_HEADER) @@ -203,6 +209,7 @@ DEFINE_CGI_PARAM_NO_RADIO("redirect", ACTION_REDIRECT, DEFINE_ACTION_BOOL ("send-vanilla-wafer", ACTION_VANILLA_WAFER) DEFINE_ACTION_MULTI ("send-wafer", ACTION_MULTI_WAFER) DEFINE_ACTION_MULTI ("server-header-filter", ACTION_MULTI_SERVER_HEADER_FILTER) +DEFINE_ACTION_MULTI ("server-header-tagger", ACTION_MULTI_SERVER_HEADER_TAGGER) DEFINE_ACTION_BOOL ("session-cookies-only", ACTION_NO_COOKIE_KEEP) DEFINE_ACTION_STRING ("set-image-blocker", ACTION_IMAGE_BLOCKER, ACTION_STRING_IMAGE_BLOCKER) DEFINE_CGI_PARAM_RADIO ("set-image-blocker", ACTION_IMAGE_BLOCKER, ACTION_STRING_IMAGE_BLOCKER, "pattern", 1) diff --git a/actions.c b/actions.c index 53872512..56706e4f 100644 --- a/actions.c +++ b/actions.c @@ -1,4 +1,4 @@ -const char actions_rcs[] = "$Id: actions.c,v 1.36 2006/12/28 17:15:42 fabiankeil Exp $"; +const char actions_rcs[] = "$Id: actions.c,v 1.37 2007/03/11 15:56:12 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/actions.c,v $ @@ -33,6 +33,9 @@ const char actions_rcs[] = "$Id: actions.c,v 1.36 2006/12/28 17:15:42 fabiankeil * * Revisions : * $Log: actions.c,v $ + * Revision 1.37 2007/03/11 15:56:12 fabiankeil + * Add kludge to log unknown aliases and actions before exiting. + * * Revision 1.36 2006/12/28 17:15:42 fabiankeil * Fix gcc43 conversion warning. * @@ -868,6 +871,67 @@ jb_err merge_current_action (struct current_action_spec *dest, } +/********************************************************************* + * + * Function : update_action_bits + * + * Description : Updates the action bits based on matching tags. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : 0 if no tag matched, or + * 1 otherwise + * + *********************************************************************/ +int update_action_bits(struct client_state *csp) +{ + struct file_list *fl; + struct url_actions *b; + struct list_entry *tag; + int updated = 0; + int i; + + /* Take each tag, */ + for (tag = csp->tags->first; tag != NULL; tag = tag->next) + { + /* run through all action files, */ + for (i = 0; i < MAX_AF_FILES; i++) + { + if (((fl = csp->actions_list[i]) == NULL) || ((b = fl->f) == NULL)) + { + /* Skip empty files */ + continue; + } + /* and through all the action patterns, */ + for (b = b->next; NULL != b; b = b->next) + { + /* skip the URL patterns, */ + if (NULL == b->url->tag_regex) + { + continue; + } + + /* and check if one of the tag patterns matches this tag, */ + if (0 == regexec(b->url->tag_regex, tag->str, 0, NULL, 0)) + { + /* if it does, update the action bit map, */ + if (merge_current_action(csp->action, b->action)) + { + log_error(LOG_LEVEL_ERROR, + "Out of memorey while changing action bits"); + } + /* and signal the change. */ + updated = 1; + } + } + } + } + + return updated; +} + + /********************************************************************* * * Function : free_current_action @@ -1479,7 +1543,7 @@ static int load_one_actions_file(struct client_state *csp, int fileid) * Function : actions_to_text * * Description : Converts a actionsfile entry from the internal - * structurt into a text line. The output is split + * structure into a text line. The output is split * into one line for each action with line continuation. * * Parameters : diff --git a/actions.h b/actions.h index 3f5c62db..0e97276f 100644 --- a/actions.h +++ b/actions.h @@ -1,14 +1,14 @@ #ifndef ACTIONS_H_INCLUDED #define ACTIONS_H_INCLUDED -#define ACTIONS_H_VERSION "$Id: actions.h,v 1.12 2002/05/06 07:56:50 oes Exp $" +#define ACTIONS_H_VERSION "$Id: actions.h,v 1.14 2006/07/18 14:48:45 david__schmidt Exp $" /********************************************************************* * - * File : $Source: /cvsroot/ijbswa/current/Attic/actions.h,v $ + * File : $Source: /cvsroot/ijbswa/current/actions.h,v $ * * Purpose : Declares functions to work with actions files * Functions declared include: FIXME * - * Copyright : Written by and Copyright (C) 2001 the SourceForge + * Copyright : Written by and Copyright (C) 2001-2007 the SourceForge * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -35,6 +35,10 @@ * * Revisions : * $Log: actions.h,v $ + * Revision 1.14 2006/07/18 14:48:45 david__schmidt + * Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch) + * with what was really the latest development (the v_3_0_branch branch) + * * Revision 1.12 2002/05/06 07:56:50 oes * Made actions_to_html independent of FEATURE_CGI_EDIT_ACTIONS * @@ -122,6 +126,7 @@ extern void init_action(struct action_spec *dest); extern void free_action(struct action_spec *src); extern jb_err merge_actions (struct action_spec *dest, const struct action_spec *src); +extern int update_action_bits(struct client_state *csp); extern jb_err copy_action (struct action_spec *dest, const struct action_spec *src); extern char * actions_to_text (struct action_spec *action); diff --git a/cgiedit.c b/cgiedit.c index 4caec74b..ea572566 100644 --- a/cgiedit.c +++ b/cgiedit.c @@ -1,4 +1,4 @@ -const char cgiedit_rcs[] = "$Id: cgiedit.c,v 1.51 2007/04/08 13:21:05 fabiankeil Exp $"; +const char cgiedit_rcs[] = "$Id: cgiedit.c,v 1.52 2007/04/12 10:41:23 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/cgiedit.c,v $ @@ -42,6 +42,11 @@ const char cgiedit_rcs[] = "$Id: cgiedit.c,v 1.51 2007/04/08 13:21:05 fabiankeil * * Revisions : * $Log: cgiedit.c,v $ + * Revision 1.52 2007/04/12 10:41:23 fabiankeil + * - Don't mistake VC++'s _snprintf() for a snprintf() replacement. + * - Move some cgi_edit_actions_for_url() variables into structs. + * - Remove bogus comment. + * * Revision 1.51 2007/04/08 13:21:05 fabiankeil * Reference action files in CGI URLs by id instead * of using the first part of the file name. @@ -493,22 +498,52 @@ struct editable_file }; /** - * Used by cgi_edit_actions_for_url() to replace filter related macros. + * Information about the filter types. + * Used for macro replacement in cgi_edit_actions_for_url. */ -struct cgi_filter_info +struct filter_type_info { const int multi_action_index; /**< The multi action index as defined in project.h */ - char *prepared_templates; /**< Temporary space for the filled-in templates for - this filter. Once all templated are aggregated - they replace the @$filtername-params@ macro. */ + const char *macro_name; /**< Name of the macro that has to be replaced + with the prepared templates. + For example "content-filter-params" */ const char *type; /**< Name of the filter type, for example "server-header-filter". */ - const char *abbr_type; /**< Abbreviation of the filter type, - usually the first character capitalized */ + const char *abbr_type; /**< Abbreviation of the filter type, usually the + first or second character capitalized */ const char *anchor; /**< Anchor for the User Manual link, for example "SERVER-HEADER-FILTER" */ }; +/* Accessed by index, keep the order in the way the FT_ macros are defined. */ +const static struct filter_type_info filter_type_info[] = +{ + { + ACTION_MULTI_FILTER, + "content-filter-params", "filter", + "F", "FILTER" + }, + { + ACTION_MULTI_CLIENT_HEADER_FILTER, + "client-header-filter-params", "client-header-filter", + "C", "CLIENT-HEADER-FILTER" + }, + { + ACTION_MULTI_SERVER_HEADER_FILTER, + "server-header-filter-params", "server-header-filter", + "S", "SERVER-HEADER-FILTER" + }, + { + ACTION_MULTI_CLIENT_HEADER_TAGGER, + "client-header-tagger-params", "client-header-tagger", + "L", "CLIENT-HEADER-TAGGER" + }, + { + ACTION_MULTI_SERVER_HEADER_TAGGER, + "server-header-tagger-params", "server-header-tagger", + "E", "SERVER-HEADER-TAGGER" + }, +}; /* FIXME: Following non-static functions should be prototyped in .h or made static */ @@ -3154,23 +3189,12 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp, */ char *filter_template; int filter_identifier = 0; - /* XXX: Should we put these into an array? */ - static struct cgi_filter_info content_filter = { - ACTION_MULTI_FILTER, NULL, - "filter", "F", "FILTER" - }; - static struct cgi_filter_info server_header_filter = { - ACTION_MULTI_SERVER_HEADER_FILTER, NULL, - "server-header-filter", "S", "SERVER-HEADER-FILTER" - }; - static struct cgi_filter_info client_header_filter = { - ACTION_MULTI_CLIENT_HEADER_FILTER, NULL, - "client-header-filter", "C", "CLIENT-HEADER-FILTER" - }; - - content_filter.prepared_templates = strdup(""); - server_header_filter.prepared_templates = strdup(""); - client_header_filter.prepared_templates = strdup(""); + char *prepared_templates[MAX_FILTER_TYPES]; + + for (i = 0; i < MAX_FILTER_TYPES; i++) + { + prepared_templates[i] = strdup(""); + } err = template_load(csp, &filter_template, "edit-actions-for-url-filter", 0); if (err) @@ -3193,32 +3217,14 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp, filter_group = csp->rlist[i]->f; for (;(!err) && (filter_group != NULL); filter_group = filter_group->next) { - int multi_action_index; char current_mode = 'x'; char number[20]; struct list_entry *filter_name; struct map *line_exports; - struct cgi_filter_info *current_filter = NULL; + const int type = filter_group->type; + const int multi_action_index = filter_type_info[type].multi_action_index; - switch (filter_group->type) - { - case FT_CONTENT_FILTER: - current_filter = &content_filter; - break; - case FT_SERVER_HEADER_FILTER: - current_filter = &server_header_filter; - break; - case FT_CLIENT_HEADER_FILTER: - current_filter = &client_header_filter; - break; - default: - log_error(LOG_LEVEL_FATAL, - "cgi_edit_actions_for_url: Unknown filter type: %u for filter %s.", - filter_group->type, filter_group->name); - /* Not reached. */ - } - assert(current_filter != NULL); - multi_action_index = current_filter->multi_action_index; + assert(type < MAX_FILTER_TYPES); filter_name = cur_line->data.action->multi_add[multi_action_index]->first; while ((filter_name != NULL) @@ -3253,7 +3259,6 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp, if (line_exports == NULL) { err = JB_ERR_MEMORY; - freez(current_filter->prepared_templates); /* XXX: really necessary? */ } else { @@ -3263,9 +3268,9 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp, if (!err) err = map(line_exports, "name", 1, filter_group->name, 1); if (!err) err = map(line_exports, "description", 1, filter_group->description, 1); if (!err) err = map_radio(line_exports, "this-filter", "ynx", current_mode); - if (!err) err = map(line_exports, "filter-type", 1, current_filter->type, 1); - if (!err) err = map(line_exports, "abbr-filter-type", 1, current_filter->abbr_type, 1); - if (!err) err = map(line_exports, "anchor", 1, current_filter->anchor, 1); + if (!err) err = map(line_exports, "filter-type", 1, filter_type_info[type].type, 1); + if (!err) err = map(line_exports, "abbr-filter-type", 1, filter_type_info[type].abbr_type, 1); + if (!err) err = map(line_exports, "anchor", 1, filter_type_info[type].anchor, 1); if (!err) { @@ -3273,7 +3278,7 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp, if (filter_line == NULL) err = JB_ERR_MEMORY; } if (!err) err = template_fill(&filter_line, line_exports); - string_join(¤t_filter->prepared_templates, filter_line); + string_join(&prepared_templates[type], filter_line); free_map(line_exports); } @@ -3282,14 +3287,20 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp, } freez(filter_template); - if (!err) err = map(exports, "content-filter-params", 1, content_filter.prepared_templates, 0); - if (!err) err = map(exports, "server-header-filter-params", 1, server_header_filter.prepared_templates, 0); - if (!err) err = map(exports, "client-header-filter-params", 1, client_header_filter.prepared_templates, 0); + /* Replace all filter macros with the aggregated templates */ + for (i = 0; i < MAX_FILTER_TYPES; i++) + { + if (err) break; + err = map(exports, filter_type_info[i].macro_name, 1, prepared_templates[i], 0); + } + if (err) { - freez(content_filter.prepared_templates); - freez(server_header_filter.prepared_templates); - freez(client_header_filter.prepared_templates); + /* Free aggregated templates */ + for (i = 0; i < MAX_FILTER_TYPES; i++) + { + freez(prepared_templates[i]); + } } } @@ -3472,6 +3483,12 @@ jb_err cgi_edit_actions_submit(struct client_state *csp, case 'C': multi_action_index = ACTION_MULTI_CLIENT_HEADER_FILTER; break; + case 'L': + multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER; + break; + case 'E': + multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER; + break; default: log_error(LOG_LEVEL_ERROR, "Unknown filter type: %c for filter %s. Filter ignored.", type, name); diff --git a/jcc.c b/jcc.c index 056a6c1e..8c1f667e 100644 --- a/jcc.c +++ b/jcc.c @@ -1,4 +1,4 @@ -const char jcc_rcs[] = "$Id: jcc.c,v 1.127 2007/03/20 13:53:17 fabiankeil Exp $"; +const char jcc_rcs[] = "$Id: jcc.c,v 1.128 2007/03/25 16:55:54 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/jcc.c,v $ @@ -33,6 +33,9 @@ const char jcc_rcs[] = "$Id: jcc.c,v 1.127 2007/03/20 13:53:17 fabiankeil Exp $" * * Revisions : * $Log: jcc.c,v $ + * Revision 1.128 2007/03/25 16:55:54 fabiankeil + * Don't CLF-log CONNECT requests twice. + * * Revision 1.127 2007/03/20 13:53:17 fabiankeil * Log the source address for ACL-related connection drops. * @@ -1867,6 +1870,13 @@ static void chat(struct client_state *csp) enlist(csp->action->multi[ACTION_MULTI_WAFER], VANILLA_WAFER); } + hdr = sed(client_patterns, add_client_headers, csp); + if (hdr == NULL) + { + /* FIXME Should handle error properly */ + log_error(LOG_LEVEL_FATAL, "Out of memory parsing client header"); + } + csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE; #ifdef FEATURE_KILL_POPUPS block_popups = ((csp->action->flags & ACTION_NO_POPUPS) != 0); @@ -1923,14 +1933,10 @@ static void chat(struct client_state *csp) csp->flags |= CSP_FLAG_REJECTED; #endif /* def FEATURE_STATISTICS */ - return; - } + freez(hdr); + list_remove_all(csp->headers); - hdr = sed(client_patterns, add_client_headers, csp); - if (hdr == NULL) - { - /* FIXME Should handle error properly */ - log_error(LOG_LEVEL_FATAL, "Out of memory parsing client header"); + return; } list_remove_all(csp->headers); diff --git a/loaders.c b/loaders.c index 17973bf1..422b2aed 100644 --- a/loaders.c +++ b/loaders.c @@ -1,4 +1,4 @@ -const char loaders_rcs[] = "$Id: loaders.c,v 1.59 2007/01/25 13:38:20 fabiankeil Exp $"; +const char loaders_rcs[] = "$Id: loaders.c,v 1.60 2007/03/20 15:16:34 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/loaders.c,v $ @@ -35,6 +35,11 @@ const char loaders_rcs[] = "$Id: loaders.c,v 1.59 2007/01/25 13:38:20 fabiankeil * * Revisions : * $Log: loaders.c,v $ + * Revision 1.60 2007/03/20 15:16:34 fabiankeil + * Use dedicated header filter actions instead of abusing "filter". + * Replace "filter-client-headers" and "filter-client-headers" + * with "server-header-filter" and "client-header-filter". + * * Revision 1.59 2007/01/25 13:38:20 fabiankeil * Freez csp->error_message in sweep(). * @@ -379,6 +384,10 @@ static struct file_list *current_re_filterfile[MAX_AF_FILES] = { NULL, NULL, NULL, NULL, NULL }; +/* + * Pseudo filter type for load_one_re_filterfile + */ +#define NO_NEW_FILTER -1 /********************************************************************* @@ -486,6 +495,7 @@ void sweep(void) free_http_request(csp->http); destroy_list(csp->headers); + destroy_list(csp->tags); destroy_list(csp->cookie_list); free_current_action(csp->action); @@ -1423,7 +1433,7 @@ int load_one_re_filterfile(struct client_state *csp, int fileid) */ while (read_config_line(buf, sizeof(buf), fp, &linenum) != NULL) { - int new_filter = 0; + int new_filter = NO_NEW_FILTER; if (strncmp(buf, "FILTER:", 7) == 0) { @@ -1437,12 +1447,20 @@ int load_one_re_filterfile(struct client_state *csp, int fileid) { new_filter = FT_CLIENT_HEADER_FILTER; } + else if (strncmp(buf, "CLIENT-HEADER-TAGGER:", 21) == 0) + { + new_filter = FT_CLIENT_HEADER_TAGGER; + } + else if (strncmp(buf, "SERVER-HEADER-TAGGER:", 21) == 0) + { + new_filter = FT_SERVER_HEADER_TAGGER; + } /* * If this is the head of a new filter block, make it a * re_filterfile spec of its own and chain it to the list: */ - if (new_filter != 0) + if (new_filter != NO_NEW_FILTER) { new_bl = (struct re_filterfile_spec *)zalloc(sizeof(*bl)); if (new_bl == NULL) diff --git a/parsers.c b/parsers.c index cf2eea8f..14ce3b6b 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.95 2007/03/25 14:26:40 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.96 2007/04/12 12:53:58 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -44,6 +44,11 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.95 2007/03/25 14:26:40 fabiankeil * * Revisions : * $Log: parsers.c,v $ + * Revision 1.96 2007/04/12 12:53:58 fabiankeil + * Log a warning if the content is compressed, filtering is + * enabled and Privoxy was compiled without zlib support. + * Closes FR#1673938. + * * Revision 1.95 2007/03/25 14:26:40 fabiankeil * - Fix warnings when compiled with glibc. * - Don't use crumble() for cookie crunching. @@ -688,6 +693,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.95 2007/03/25 14:26:40 fabiankeil #include "jbsockets.h" #include "miscutil.h" #include "list.h" +#include "actions.h" #ifndef HAVE_STRPTIME #include "strptime.h" @@ -697,7 +703,7 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION; /* Fix a problem with Solaris. There should be no effect on other * platforms. - * Solaris's isspace() is a macro which uses it's argument directly + * Solaris's isspace() is a macro which uses its argument directly * as an array index. Therefore we need to make sure that high-bit * characters generate +ve values, and ideally we also want to make * the argument match the declared parameter type of "int". @@ -708,6 +714,8 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION; #define ijb_isupper(__X) isupper((int)(unsigned char)(__X)) #define ijb_tolower(__X) tolower((int)(unsigned char)(__X)) +jb_err header_tagger(struct client_state *csp, char *header); +jb_err scan_headers(struct client_state *csp); const struct parsers client_patterns[] = { { "referer:", 8, client_referrer }, @@ -1365,6 +1373,40 @@ char *get_header_value(const struct list *header_list, const char *header_name) } + +/********************************************************************* + * + * Function : scan_headers + * + * Description : Scans headers, applies tags and updates action bits. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK + * + *********************************************************************/ +jb_err scan_headers(struct client_state *csp) +{ + struct list_entry *h; /* Header */ + jb_err err = JB_ERR_OK; + + log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); + + for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next) + { + /* Header crunch()ed in previous run? -> ignore */ + if (h->str == NULL) continue; + log_error(LOG_LEVEL_HEADER, "scan: %s", h->str); + err = header_tagger(csp, h->str); + } + + update_action_bits(csp); + + return err; +} + + /********************************************************************* * * Function : sed @@ -1405,7 +1447,8 @@ char *sed(const struct parsers pats[], if (first_run) /* Parse and print */ { - log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); + scan_headers(csp); + for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++) { for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next) @@ -1413,8 +1456,6 @@ char *sed(const struct parsers pats[], /* Header crunch()ed in previous run? -> ignore */ if (p->str == NULL) continue; - if (v == pats) log_error(LOG_LEVEL_HEADER, "scan: %s", p->str); - /* Does the current parser handle this header? */ if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING)) { @@ -1463,6 +1504,171 @@ char *sed(const struct parsers pats[], } + +/********************************************************************* + * + * Function : header_tagger + * + * Description : Executes all text substitutions from applying + * tag actions and saves the result as tag. + * + * XXX: Shares enough code with filter_header() and + * pcrs_filter_response() to warrant some helper functions. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = Header that is used as tagger input + * + * Returns : JB_ERR_OK on success and always succeeds + * + *********************************************************************/ +jb_err header_tagger(struct client_state *csp, char *header) +{ + int wanted_filter_type; + int multi_action_index; + int i; + pcrs_job *job; + + struct file_list *fl; + struct re_filterfile_spec *b; + struct list_entry *tag_name; + + int found_filters = 0; + const size_t header_length = strlen(header); + + if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) + { + wanted_filter_type = FT_SERVER_HEADER_TAGGER; + multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER; + } + else + { + wanted_filter_type = FT_CLIENT_HEADER_TAGGER; + multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER; + } + + /* Check if there are any filters */ + for (i = 0; i < MAX_AF_FILES; i++) + { + fl = csp->rlist[i]; + if (NULL != fl) + { + if (NULL != fl->f) + { + found_filters = 1; + break; + } + } + } + + if (0 == found_filters) + { + log_error(LOG_LEVEL_ERROR, "Unable to get current state of regex tagging."); + return(JB_ERR_OK); + } + + for (i = 0; i < MAX_AF_FILES; i++) + { + fl = csp->rlist[i]; + if ((NULL == fl) || (NULL == fl->f)) + { + /* + * Either there are no filter files + * left, or this filter file just + * contains no valid filters. + * + * Continue to be sure we don't miss + * valid filter files that are chained + * after empty or invalid ones. + */ + continue; + } + + /* For all filters, */ + for (b = fl->f; b; b = b->next) + { + if (b->type != wanted_filter_type) + { + /* skip the ones we don't care about, */ + continue; + } + /* leaving only taggers that could apply, of which we use the ones, */ + for (tag_name = csp->action->multi[multi_action_index]->first; + NULL != tag_name; tag_name = tag_name->next) + { + /* that do apply, and */ + if (strcmp(b->name, tag_name->str) == 0) + { + char *modified_tag = NULL; + char *tag = header; + size_t size = header_length; + + if (NULL == b->joblist) + { + log_error(LOG_LEVEL_RE_FILTER, + "Tagger %s has empty joblist. Nothing to do.", b->name); + continue; + } + + /* execute their pcrs_joblist on the header. */ + for (job = b->joblist; NULL != job; job = job->next) + { + const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); + + if (0 < hits) + { + /* Success, continue with the modified version. */ + if (tag != header) + { + freez(tag); + } + tag = modified_tag; + } + else + { + /* Tagger doesn't match */ + if (0 > hits) + { + /* Regex failure, log it but continue anyway. */ + log_error(LOG_LEVEL_ERROR, + "Problems with tagger \'%s\' and header \'%s\': %s", + b->name, *header, pcrs_strerror(hits)); + } + freez(modified_tag); + } + } + + /* If this tagger matched */ + if (tag != header) + { + /* and there is something left to save, */ + if (0 < size) + { + /* enlist a unique version of it as tag. */ + if (JB_ERR_OK != enlist_unique(csp->tags, tag, 0)) + { + log_error(LOG_LEVEL_ERROR, + "Insufficient memory to add tag \'%s\', " + "based on tagger \'%s\' and header \'%s\'", + tag, b->name, *header); + } + else + { + log_error(LOG_LEVEL_HEADER, + "Adding tag \'%s\' created by header tagger \'%s\'", + tag, b->name); + } + } + freez(tag); + } + } /* if the tagger applies */ + } /* for every tagger that could apply */ + } /* for all filters */ + } /* for all filter files */ + + return JB_ERR_OK; +} + /* here begins the family of parser functions that reformat header lines */ /********************************************************************* @@ -3415,9 +3621,6 @@ jb_err connection_close_adder(struct client_state *csp) *********************************************************************/ jb_err server_http(struct client_state *csp, char **header) { - /* Signal that were now parsing server headers. */ - csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE; - sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status)); if (csp->http->status == 206) { diff --git a/project.h b/project.h index d3cde777..f52b5d65 100644 --- a/project.h +++ b/project.h @@ -1,7 +1,7 @@ #ifndef PROJECT_H_INCLUDED #define PROJECT_H_INCLUDED /** Version string. */ -#define PROJECT_H_VERSION "$Id: project.h,v 1.92 2007/03/17 15:20:05 fabiankeil Exp $" +#define PROJECT_H_VERSION "$Id: project.h,v 1.93 2007/03/20 15:16:34 fabiankeil Exp $" /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/project.h,v $ @@ -37,6 +37,11 @@ * * Revisions : * $Log: project.h,v $ + * Revision 1.93 2007/03/20 15:16:34 fabiankeil + * Use dedicated header filter actions instead of abusing "filter". + * Replace "filter-client-headers" and "filter-client-headers" + * with "server-header-filter" and "client-header-filter". + * * Revision 1.92 2007/03/17 15:20:05 fabiankeil * New config option: enforce-blocks. * @@ -857,7 +862,7 @@ struct http_response }; /** - * A URL pattern. + * A URL or a tag pattern. */ struct url_spec { @@ -875,12 +880,13 @@ struct url_spec char *path; /**< The source for the regex. */ size_t pathlen; /**< ==strlen(path). Needed for prefix matching. FIXME: Now obsolete? */ regex_t *preg; /**< Regex for matching path part */ + regex_t *tag_regex; /**< Regex for matching tags */ }; /** * If you declare a static url_spec, this is the value to initialize it to zero. */ -#define URL_SPEC_INITIALIZER { NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL } +#define URL_SPEC_INITIALIZER { NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL } /** * Constant for host part matching in URLs. If set, indicates that the start of @@ -1064,8 +1070,12 @@ struct iob #define ACTION_MULTI_SERVER_HEADER_FILTER 3 /** Index into current_action_spec::multi[] for client-header filters to apply. */ #define ACTION_MULTI_CLIENT_HEADER_FILTER 4 +/** Index into current_action_spec::multi[] for client-header tags to apply. */ +#define ACTION_MULTI_CLIENT_HEADER_TAGGER 5 +/** Index into current_action_spec::multi[] for server-header tags to apply. */ +#define ACTION_MULTI_SERVER_HEADER_TAGGER 6 /** Number of multi-string actions. */ -#define ACTION_MULTI_COUNT 5 +#define ACTION_MULTI_COUNT 7 /** @@ -1263,6 +1273,9 @@ struct client_state /** List of all headers for this request */ struct list headers[1]; + /** List of all tags that apply to this request */ + struct list tags[1]; + /** List of all cookies for this request */ struct list cookie_list[1]; @@ -1452,9 +1465,13 @@ struct forward_spec #define FORWARD_SPEC_INITIALIZER { { URL_SPEC_INITIALIZER }, 0, NULL, 0, NULL, 0, NULL } /* Supported filter types */ -#define FT_CONTENT_FILTER 1 -#define FT_CLIENT_HEADER_FILTER 2 -#define FT_SERVER_HEADER_FILTER 3 +#define FT_CONTENT_FILTER 0 +#define FT_CLIENT_HEADER_FILTER 1 +#define FT_SERVER_HEADER_FILTER 2 +#define FT_CLIENT_HEADER_TAGGER 3 +#define FT_SERVER_HEADER_TAGGER 4 + +#define MAX_FILTER_TYPES 5 /** * This struct represents one filter (one block) from diff --git a/templates/edit-actions-for-url b/templates/edit-actions-for-url index 441596f0..98847a4c 100644 --- a/templates/edit-actions-for-url +++ b/templates/edit-actions-for-url @@ -32,6 +32,10 @@ # # Revisions : # $Log: edit-actions-for-url,v $ +# Revision 1.43 2007/03/29 11:40:34 fabiankeil +# Divide @filter-params@ into @client-header-filter-params@ +# @content-filter-params@ and @server-header-filter-params@. +# # Revision 1.42 2007/03/20 15:40:00 fabiankeil # Adjust to new world order with dedicated header-filter actions. # @@ -515,6 +519,7 @@ function show_send_wafer_opts(tf) @client-header-filter-params@ +@client-header-tagger-params@ @server-header-filter-params@ +@server-header-tagger-params@ spec, 4)) + { + if (NULL == (url->tag_regex = zalloc(sizeof(*url->tag_regex)))) + { + freez(url->spec); + return JB_ERR_MEMORY; + } + + /* buf + 4 to skip "TAG:" */ + errcode = regcomp(url->tag_regex, buf + 4, (REG_EXTENDED|REG_NOSUB|REG_ICASE)); + if (errcode) + { + errlen = regerror(errcode, url->preg, rebuf, sizeof(rebuf)); + if (errlen > (sizeof(rebuf) - 1)) + { + errlen = sizeof(rebuf) - 1; + } + rebuf[errlen] = '\0'; + + log_error(LOG_LEVEL_ERROR, "error compiling %s: %s", url->spec, rebuf); + + freez(url->spec); + regfree(url->tag_regex); + freez(url->tag_regex); + + return JB_ERR_PARSE; + } + return JB_ERR_OK; + } + + /* Only reached for URL patterns */ if ((p = strchr(buf, '/')) != NULL) { if (NULL == (url->path = strdup(p))) @@ -775,9 +813,6 @@ jb_err create_url_spec(struct url_spec * url, const char * buf) } if (url->path) { - int errcode; - char rebuf[BUFFER_SIZE]; - if (NULL == (url->preg = zalloc(sizeof(*url->preg)))) { freez(url->spec); @@ -791,8 +826,7 @@ jb_err create_url_spec(struct url_spec * url, const char * buf) (REG_EXTENDED|REG_NOSUB|REG_ICASE)); if (errcode) { - size_t errlen = regerror(errcode, - url->preg, rebuf, sizeof(rebuf)); + errlen = regerror(errcode, url->preg, rebuf, sizeof(rebuf)); if (errlen > (sizeof(rebuf) - (size_t)1)) { @@ -934,6 +968,11 @@ void free_url_spec(struct url_spec *url) regfree(url->preg); freez(url->preg); } + if (url->tag_regex) + { + regfree(url->tag_regex); + freez(url->tag_regex); + } } @@ -947,12 +986,18 @@ void free_url_spec(struct url_spec *url) * 1 : pattern = a URL pattern * 2 : url = URL to match * - * Returns : 0 iff the URL matches the pattern, else nonzero. + * Returns : Nonzero if the URL matches the pattern, else 0. * *********************************************************************/ int url_match(const struct url_spec *pattern, const struct http_request *url) { + if (pattern->tag_regex != NULL) + { + /* It's a tag pattern and shouldn't be matched against URLs */ + return 0; + } + return ((pattern->port == 0) || (pattern->port == url->port)) && ((pattern->dbuffer == NULL) || (domain_match(pattern, url) == 0)) && ((pattern->path == NULL) ||