Introduce tags as alternative way to specify which
authorFabian Keil <fk@fabiankeil.de>
Sun, 15 Apr 2007 16:39:21 +0000 (16:39 +0000)
committerFabian Keil <fk@fabiankeil.de>
Sun, 15 Apr 2007 16:39:21 +0000 (16:39 +0000)
actions apply to a request. At the moment tags can be
created based on client and server headers.

actionlist.h
actions.c
actions.h
cgiedit.c
jcc.c
loaders.c
parsers.c
project.h
templates/edit-actions-for-url
urlmatch.c

index 3d36d66..c4d19b6 100644 (file)
  *
  * Revisions   :
  *    $Log: actionlist.h,v $
+ *    Revision 1.24  2007/03/20 15:16:34  fabiankeil
+ *    Use dedicated header filter actions instead of abusing "filter".
+ *    Replace "filter-client-headers" and "filter-client-headers"
+ *    with "server-header-filter" and "client-header-filter".
+ *
  *    Revision 1.23  2006/10/09 10:26:18  fabiankeil
  *    Changed the path in set-image-blocker's redirection default to
  *    "send-banner?type=pattern" instead of "show-banner?type=pattern"
 DEFINE_ACTION_MULTI      ("add-header",                 ACTION_MULTI_ADD_HEADER)
 DEFINE_ACTION_BOOL       ("block",                      ACTION_BLOCK)
 DEFINE_ACTION_MULTI      ("client-header-filter",       ACTION_MULTI_CLIENT_HEADER_FILTER)
+DEFINE_ACTION_MULTI      ("client-header-tagger",       ACTION_MULTI_CLIENT_HEADER_TAGGER)
 DEFINE_ACTION_STRING     ("content-type-overwrite",     ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE)
 DEFINE_CGI_PARAM_NO_RADIO("content-type-overwrite",     ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE,    "text/html")
 DEFINE_ACTION_STRING     ("crunch-client-header",       ACTION_CRUNCH_CLIENT_HEADER, ACTION_STRING_CLIENT_HEADER)
@@ -203,6 +209,7 @@ DEFINE_CGI_PARAM_NO_RADIO("redirect",                   ACTION_REDIRECT,
 DEFINE_ACTION_BOOL       ("send-vanilla-wafer",         ACTION_VANILLA_WAFER)
 DEFINE_ACTION_MULTI      ("send-wafer",                 ACTION_MULTI_WAFER)
 DEFINE_ACTION_MULTI      ("server-header-filter",       ACTION_MULTI_SERVER_HEADER_FILTER)
+DEFINE_ACTION_MULTI      ("server-header-tagger",       ACTION_MULTI_SERVER_HEADER_TAGGER)
 DEFINE_ACTION_BOOL       ("session-cookies-only",       ACTION_NO_COOKIE_KEEP)
 DEFINE_ACTION_STRING     ("set-image-blocker",          ACTION_IMAGE_BLOCKER,   ACTION_STRING_IMAGE_BLOCKER)
 DEFINE_CGI_PARAM_RADIO   ("set-image-blocker",          ACTION_IMAGE_BLOCKER,   ACTION_STRING_IMAGE_BLOCKER, "pattern", 1)
index 5387251..56706e4 100644 (file)
--- a/actions.c
+++ b/actions.c
@@ -1,4 +1,4 @@
-const char actions_rcs[] = "$Id: actions.c,v 1.36 2006/12/28 17:15:42 fabiankeil Exp $";
+const char actions_rcs[] = "$Id: actions.c,v 1.37 2007/03/11 15:56:12 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/actions.c,v $
@@ -33,6 +33,9 @@ const char actions_rcs[] = "$Id: actions.c,v 1.36 2006/12/28 17:15:42 fabiankeil
  *
  * Revisions   :
  *    $Log: actions.c,v $
+ *    Revision 1.37  2007/03/11 15:56:12  fabiankeil
+ *    Add kludge to log unknown aliases and actions before exiting.
+ *
  *    Revision 1.36  2006/12/28 17:15:42  fabiankeil
  *    Fix gcc43 conversion warning.
  *
@@ -868,6 +871,67 @@ jb_err merge_current_action (struct current_action_spec *dest,
 }
 
 
+/*********************************************************************
+ *
+ * Function    :  update_action_bits
+ *
+ * Description :  Updates the action bits based on matching tags.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns     :  0 if no tag matched, or
+ *                1 otherwise
+ *
+ *********************************************************************/
+int update_action_bits(struct client_state *csp)
+{
+   struct file_list *fl;
+   struct url_actions *b;
+   struct list_entry *tag;
+   int updated = 0;
+   int i;
+
+   /* Take each tag, */
+   for (tag = csp->tags->first; tag != NULL; tag = tag->next)
+   {
+      /* run through all action files, */
+      for (i = 0; i < MAX_AF_FILES; i++)
+      {
+         if (((fl = csp->actions_list[i]) == NULL) || ((b = fl->f) == NULL))
+         {
+            /* Skip empty files */
+            continue;
+         }
+         /* and through all the action patterns, */
+         for (b = b->next; NULL != b; b = b->next)
+         {
+            /* skip the URL patterns, */
+            if (NULL == b->url->tag_regex)
+            {
+               continue;
+            }
+
+            /* and check if one of the tag patterns matches this tag, */
+            if (0 == regexec(b->url->tag_regex, tag->str, 0, NULL, 0))
+            {
+               /* if it does, update the action bit map, */
+               if (merge_current_action(csp->action, b->action))
+               {
+                  log_error(LOG_LEVEL_ERROR,
+                     "Out of memorey while changing action bits");
+               }
+               /* and signal the change. */
+               updated = 1;
+            }
+         }
+      }
+   }
+
+   return updated;
+}
+
+
 /*********************************************************************
  *
  * Function    :  free_current_action
@@ -1479,7 +1543,7 @@ static int load_one_actions_file(struct client_state *csp, int fileid)
  * Function    :  actions_to_text
  *
  * Description :  Converts a actionsfile entry from the internal
- *                structurt into a text line.  The output is split
+ *                structure into a text line.  The output is split
  *                into one line for each action with line continuation. 
  *
  * Parameters  :
index 3f5c62d..0e97276 100644 (file)
--- a/actions.h
+++ b/actions.h
@@ -1,14 +1,14 @@
 #ifndef ACTIONS_H_INCLUDED
 #define ACTIONS_H_INCLUDED
-#define ACTIONS_H_VERSION "$Id: actions.h,v 1.12 2002/05/06 07:56:50 oes Exp $"
+#define ACTIONS_H_VERSION "$Id: actions.h,v 1.14 2006/07/18 14:48:45 david__schmidt Exp $"
 /*********************************************************************
  *
- * File        :  $Source: /cvsroot/ijbswa/current/Attic/actions.h,v $
+ * File        :  $Source: /cvsroot/ijbswa/current/actions.h,v $
  *
  * Purpose     :  Declares functions to work with actions files
  *                Functions declared include: FIXME
  *
- * Copyright   :  Written by and Copyright (C) 2001 the SourceForge
+ * Copyright   :  Written by and Copyright (C) 2001-2007 the SourceForge
  *                Privoxy team. http://www.privoxy.org/
  *
  *                Based on the Internet Junkbuster originally written
  *
  * Revisions   :
  *    $Log: actions.h,v $
+ *    Revision 1.14  2006/07/18 14:48:45  david__schmidt
+ *    Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch)
+ *    with what was really the latest development (the v_3_0_branch branch)
+ *
  *    Revision 1.12  2002/05/06 07:56:50  oes
  *    Made actions_to_html independent of FEATURE_CGI_EDIT_ACTIONS
  *
@@ -122,6 +126,7 @@ extern void init_action(struct action_spec *dest);
 extern void free_action(struct action_spec *src);
 extern jb_err merge_actions (struct action_spec *dest, 
                              const struct action_spec *src);
+extern int update_action_bits(struct client_state *csp);
 extern jb_err copy_action (struct action_spec *dest, 
                            const struct action_spec *src);
 extern char * actions_to_text     (struct action_spec *action);
index 4caec74..ea57256 100644 (file)
--- a/cgiedit.c
+++ b/cgiedit.c
@@ -1,4 +1,4 @@
-const char cgiedit_rcs[] = "$Id: cgiedit.c,v 1.51 2007/04/08 13:21:05 fabiankeil Exp $";
+const char cgiedit_rcs[] = "$Id: cgiedit.c,v 1.52 2007/04/12 10:41:23 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/cgiedit.c,v $
@@ -42,6 +42,11 @@ const char cgiedit_rcs[] = "$Id: cgiedit.c,v 1.51 2007/04/08 13:21:05 fabiankeil
  *
  * Revisions   :
  *    $Log: cgiedit.c,v $
+ *    Revision 1.52  2007/04/12 10:41:23  fabiankeil
+ *    - Don't mistake VC++'s _snprintf() for a snprintf() replacement.
+ *    - Move some cgi_edit_actions_for_url() variables into structs.
+ *    - Remove bogus comment.
+ *
  *    Revision 1.51  2007/04/08 13:21:05  fabiankeil
  *    Reference action files in CGI URLs by id instead
  *    of using the first part of the file name.
@@ -493,22 +498,52 @@ struct editable_file
 };
 
 /**
- * Used by cgi_edit_actions_for_url() to replace filter related macros.
+ * Information about the filter types.
+ * Used for macro replacement in cgi_edit_actions_for_url.
  */
-struct cgi_filter_info
+struct filter_type_info
 {
    const int multi_action_index; /**< The multi action index as defined in project.h */
-   char *prepared_templates;     /**< Temporary space for the filled-in templates for
-                                      this filter. Once all templated are aggregated
-                                      they replace the @$filtername-params@ macro. */
+   const char *macro_name;       /**< Name of the macro that has to be replaced
+                                      with the prepared templates.
+                                      For example "content-filter-params" */
    const char *type;             /**< Name of the filter type,
                                       for example "server-header-filter". */
-   const char *abbr_type;        /**< Abbreviation of the filter type,
-                                      usually the first character capitalized */
+   const char *abbr_type;        /**< Abbreviation of the filter type, usually the
+                                      first or second character capitalized */
    const char *anchor;           /**< Anchor for the User Manual link,
                                       for example "SERVER-HEADER-FILTER"  */
 };
 
+/* Accessed by index, keep the order in the way the FT_ macros are defined. */
+const static struct filter_type_info filter_type_info[] =
+{
+   {
+      ACTION_MULTI_FILTER,
+      "content-filter-params", "filter",
+      "F", "FILTER"
+   },
+   {
+      ACTION_MULTI_CLIENT_HEADER_FILTER,
+      "client-header-filter-params", "client-header-filter",
+      "C", "CLIENT-HEADER-FILTER"
+   },
+   {
+      ACTION_MULTI_SERVER_HEADER_FILTER,
+      "server-header-filter-params", "server-header-filter",
+      "S", "SERVER-HEADER-FILTER"
+   },
+   {
+      ACTION_MULTI_CLIENT_HEADER_TAGGER,
+      "client-header-tagger-params", "client-header-tagger",
+      "L", "CLIENT-HEADER-TAGGER"
+   },
+   {
+      ACTION_MULTI_SERVER_HEADER_TAGGER,
+      "server-header-tagger-params", "server-header-tagger",
+      "E", "SERVER-HEADER-TAGGER"
+   },
+};
 
 /* FIXME: Following non-static functions should be prototyped in .h or made static */
 
@@ -3154,23 +3189,12 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp,
        */
       char *filter_template;
       int filter_identifier = 0;
-      /* XXX: Should we put these into an array? */
-      static struct cgi_filter_info content_filter = {
-        ACTION_MULTI_FILTER, NULL,
-         "filter", "F", "FILTER"
-      };
-      static struct cgi_filter_info server_header_filter = {
-         ACTION_MULTI_SERVER_HEADER_FILTER, NULL,
-         "server-header-filter", "S", "SERVER-HEADER-FILTER"
-      };
-      static struct cgi_filter_info client_header_filter = {
-         ACTION_MULTI_CLIENT_HEADER_FILTER, NULL,
-         "client-header-filter", "C", "CLIENT-HEADER-FILTER"
-      };
-
-      content_filter.prepared_templates = strdup("");
-      server_header_filter.prepared_templates = strdup("");
-      client_header_filter.prepared_templates = strdup("");
+      char *prepared_templates[MAX_FILTER_TYPES];
+
+      for (i = 0; i < MAX_FILTER_TYPES; i++)
+      {
+         prepared_templates[i] = strdup("");
+      }
 
       err = template_load(csp, &filter_template, "edit-actions-for-url-filter", 0);
       if (err)
@@ -3193,32 +3217,14 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp,
             filter_group = csp->rlist[i]->f;
             for (;(!err) && (filter_group != NULL); filter_group = filter_group->next)
             {
-               int multi_action_index;
                char current_mode = 'x';
                char number[20];
                struct list_entry *filter_name;
                struct map *line_exports;
-               struct cgi_filter_info *current_filter = NULL;
+               const int type = filter_group->type;
+               const int multi_action_index = filter_type_info[type].multi_action_index;
 
-               switch (filter_group->type)
-               {
-                  case FT_CONTENT_FILTER:
-                     current_filter = &content_filter;
-                     break;
-                  case FT_SERVER_HEADER_FILTER:
-                     current_filter = &server_header_filter;
-                     break;
-                  case FT_CLIENT_HEADER_FILTER:
-                     current_filter = &client_header_filter;
-                     break;
-                  default:
-                     log_error(LOG_LEVEL_FATAL,
-                        "cgi_edit_actions_for_url: Unknown filter type: %u for filter %s.",
-                        filter_group->type, filter_group->name);
-                     /* Not reached. */
-               }
-               assert(current_filter != NULL);
-               multi_action_index = current_filter->multi_action_index;
+               assert(type < MAX_FILTER_TYPES);
 
                filter_name = cur_line->data.action->multi_add[multi_action_index]->first;
                while ((filter_name != NULL)
@@ -3253,7 +3259,6 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp,
                if (line_exports == NULL)
                {
                   err = JB_ERR_MEMORY;
-                  freez(current_filter->prepared_templates); /* XXX: really necessary? */
                }
                else
                {
@@ -3263,9 +3268,9 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp,
                   if (!err) err = map(line_exports, "name",  1, filter_group->name, 1);
                   if (!err) err = map(line_exports, "description",  1, filter_group->description, 1);
                   if (!err) err = map_radio(line_exports, "this-filter", "ynx", current_mode);
-                  if (!err) err = map(line_exports, "filter-type", 1, current_filter->type, 1);
-                  if (!err) err = map(line_exports, "abbr-filter-type", 1, current_filter->abbr_type, 1);
-                  if (!err) err = map(line_exports, "anchor", 1, current_filter->anchor, 1);
+                  if (!err) err = map(line_exports, "filter-type", 1, filter_type_info[type].type, 1);
+                  if (!err) err = map(line_exports, "abbr-filter-type", 1, filter_type_info[type].abbr_type, 1);
+                  if (!err) err = map(line_exports, "anchor", 1, filter_type_info[type].anchor, 1);
 
                   if (!err)
                   {
@@ -3273,7 +3278,7 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp,
                      if (filter_line == NULL) err = JB_ERR_MEMORY;
                   }
                   if (!err) err = template_fill(&filter_line, line_exports);
-                  string_join(&current_filter->prepared_templates, filter_line);
+                  string_join(&prepared_templates[type], filter_line);
 
                   free_map(line_exports);
                }
@@ -3282,14 +3287,20 @@ jb_err cgi_edit_actions_for_url(struct client_state *csp,
       }
       freez(filter_template);
 
-      if (!err) err = map(exports, "content-filter-params", 1, content_filter.prepared_templates, 0);
-      if (!err) err = map(exports, "server-header-filter-params", 1, server_header_filter.prepared_templates, 0);
-      if (!err) err = map(exports, "client-header-filter-params", 1, client_header_filter.prepared_templates, 0);
+      /* Replace all filter macros with the aggregated templates */
+      for (i = 0; i < MAX_FILTER_TYPES; i++)
+      {
+         if (err) break;
+         err = map(exports, filter_type_info[i].macro_name, 1, prepared_templates[i], 0);
+      }
+
       if (err)
       {
-         freez(content_filter.prepared_templates);
-         freez(server_header_filter.prepared_templates);
-         freez(client_header_filter.prepared_templates);
+         /* Free aggregated templates */
+         for (i = 0; i < MAX_FILTER_TYPES; i++)
+         {
+            freez(prepared_templates[i]);
+         }
       }
    }
 
@@ -3472,6 +3483,12 @@ jb_err cgi_edit_actions_submit(struct client_state *csp,
          case 'C':
             multi_action_index = ACTION_MULTI_CLIENT_HEADER_FILTER;
             break;
+         case 'L':
+            multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER;
+            break;
+         case 'E':
+            multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER;
+            break;
          default:
             log_error(LOG_LEVEL_ERROR,
                "Unknown filter type: %c for filter %s. Filter ignored.", type, name);
diff --git a/jcc.c b/jcc.c
index 056a6c1..8c1f667 100644 (file)
--- a/jcc.c
+++ b/jcc.c
@@ -1,4 +1,4 @@
-const char jcc_rcs[] = "$Id: jcc.c,v 1.127 2007/03/20 13:53:17 fabiankeil Exp $";
+const char jcc_rcs[] = "$Id: jcc.c,v 1.128 2007/03/25 16:55:54 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/jcc.c,v $
@@ -33,6 +33,9 @@ const char jcc_rcs[] = "$Id: jcc.c,v 1.127 2007/03/20 13:53:17 fabiankeil Exp $"
  *
  * Revisions   :
  *    $Log: jcc.c,v $
+ *    Revision 1.128  2007/03/25 16:55:54  fabiankeil
+ *    Don't CLF-log CONNECT requests twice.
+ *
  *    Revision 1.127  2007/03/20 13:53:17  fabiankeil
  *    Log the source address for ACL-related connection drops.
  *
@@ -1867,6 +1870,13 @@ static void chat(struct client_state *csp)
       enlist(csp->action->multi[ACTION_MULTI_WAFER], VANILLA_WAFER);
    }
 
+   hdr = sed(client_patterns, add_client_headers, csp);
+   if (hdr == NULL)
+   {
+      /* FIXME Should handle error properly */
+      log_error(LOG_LEVEL_FATAL, "Out of memory parsing client header");
+   }
+   csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE;
 
 #ifdef FEATURE_KILL_POPUPS
    block_popups               = ((csp->action->flags & ACTION_NO_POPUPS) != 0);
@@ -1923,14 +1933,10 @@ static void chat(struct client_state *csp)
       csp->flags |= CSP_FLAG_REJECTED;
 #endif /* def FEATURE_STATISTICS */
 
-      return;
-   }
+      freez(hdr);
+      list_remove_all(csp->headers);
 
-   hdr = sed(client_patterns, add_client_headers, csp);
-   if (hdr == NULL)
-   {
-      /* FIXME Should handle error properly */
-      log_error(LOG_LEVEL_FATAL, "Out of memory parsing client header");
+      return;
    }
 
    list_remove_all(csp->headers);
index 17973bf..422b2ae 100644 (file)
--- a/loaders.c
+++ b/loaders.c
@@ -1,4 +1,4 @@
-const char loaders_rcs[] = "$Id: loaders.c,v 1.59 2007/01/25 13:38:20 fabiankeil Exp $";
+const char loaders_rcs[] = "$Id: loaders.c,v 1.60 2007/03/20 15:16:34 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/loaders.c,v $
@@ -35,6 +35,11 @@ const char loaders_rcs[] = "$Id: loaders.c,v 1.59 2007/01/25 13:38:20 fabiankeil
  *
  * Revisions   :
  *    $Log: loaders.c,v $
+ *    Revision 1.60  2007/03/20 15:16:34  fabiankeil
+ *    Use dedicated header filter actions instead of abusing "filter".
+ *    Replace "filter-client-headers" and "filter-client-headers"
+ *    with "server-header-filter" and "client-header-filter".
+ *
  *    Revision 1.59  2007/01/25 13:38:20  fabiankeil
  *    Freez csp->error_message in sweep().
  *
@@ -379,6 +384,10 @@ static struct file_list *current_re_filterfile[MAX_AF_FILES]  = {
    NULL, NULL, NULL, NULL, NULL
 };
 
+/*
+ * Pseudo filter type for load_one_re_filterfile
+ */
+#define NO_NEW_FILTER -1
 
 
 /*********************************************************************
@@ -486,6 +495,7 @@ void sweep(void)
          free_http_request(csp->http);
 
          destroy_list(csp->headers);
+         destroy_list(csp->tags);
          destroy_list(csp->cookie_list);
 
          free_current_action(csp->action);
@@ -1423,7 +1433,7 @@ int load_one_re_filterfile(struct client_state *csp, int fileid)
     */
    while (read_config_line(buf, sizeof(buf), fp, &linenum) != NULL)
    {
-      int new_filter = 0;
+      int new_filter = NO_NEW_FILTER;
 
       if (strncmp(buf, "FILTER:", 7) == 0)
       {
@@ -1437,12 +1447,20 @@ int load_one_re_filterfile(struct client_state *csp, int fileid)
       {
          new_filter = FT_CLIENT_HEADER_FILTER;
       }
+      else if (strncmp(buf, "CLIENT-HEADER-TAGGER:", 21) == 0)
+      {
+         new_filter = FT_CLIENT_HEADER_TAGGER;
+      }
+      else if (strncmp(buf, "SERVER-HEADER-TAGGER:", 21) == 0)
+      {
+         new_filter = FT_SERVER_HEADER_TAGGER;
+      }
 
       /*
        * If this is the head of a new filter block, make it a
        * re_filterfile spec of its own and chain it to the list:
        */
-      if (new_filter != 0)
+      if (new_filter != NO_NEW_FILTER)
       {
          new_bl = (struct re_filterfile_spec  *)zalloc(sizeof(*bl));
          if (new_bl == NULL)
index cf2eea8..14ce3b6 100644 (file)
--- a/parsers.c
+++ b/parsers.c
@@ -1,4 +1,4 @@
-const char parsers_rcs[] = "$Id: parsers.c,v 1.95 2007/03/25 14:26:40 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.96 2007/04/12 12:53:58 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/parsers.c,v $
@@ -44,6 +44,11 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.95 2007/03/25 14:26:40 fabiankeil
  *
  * Revisions   :
  *    $Log: parsers.c,v $
+ *    Revision 1.96  2007/04/12 12:53:58  fabiankeil
+ *    Log a warning if the content is compressed, filtering is
+ *    enabled and Privoxy was compiled without zlib support.
+ *    Closes FR#1673938.
+ *
  *    Revision 1.95  2007/03/25 14:26:40  fabiankeil
  *    - Fix warnings when compiled with glibc.
  *    - Don't use crumble() for cookie crunching.
@@ -688,6 +693,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.95 2007/03/25 14:26:40 fabiankeil
 #include "jbsockets.h"
 #include "miscutil.h"
 #include "list.h"
+#include "actions.h"
 
 #ifndef HAVE_STRPTIME
 #include "strptime.h"
@@ -697,7 +703,7 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION;
 
 /* Fix a problem with Solaris.  There should be no effect on other
  * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
+ * Solaris's isspace() is a macro which uses its argument directly
  * as an array index.  Therefore we need to make sure that high-bit
  * characters generate +ve values, and ideally we also want to make
  * the argument match the declared parameter type of "int".
@@ -708,6 +714,8 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION;
 #define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
 #define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
 
+jb_err header_tagger(struct client_state *csp, char *header);
+jb_err scan_headers(struct client_state *csp);
 
 const struct parsers client_patterns[] = {
    { "referer:",                  8,   client_referrer },
@@ -1365,6 +1373,40 @@ char *get_header_value(const struct list *header_list, const char *header_name)
 
 }
 
+
+/*********************************************************************
+ *
+ * Function    :  scan_headers
+ *
+ * Description :  Scans headers, applies tags and updates action bits. 
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns     :  JB_ERR_OK
+ *
+ *********************************************************************/
+jb_err scan_headers(struct client_state *csp)
+{
+   struct list_entry *h; /* Header */
+   jb_err err = JB_ERR_OK;
+
+   log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
+
+   for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next)
+   {
+      /* Header crunch()ed in previous run? -> ignore */
+      if (h->str == NULL) continue;
+      log_error(LOG_LEVEL_HEADER, "scan: %s", h->str);
+      err = header_tagger(csp, h->str);
+   }
+
+   update_action_bits(csp);
+
+   return err;
+}
+
+
 /*********************************************************************
  *
  * Function    :  sed
@@ -1405,7 +1447,8 @@ char *sed(const struct parsers pats[],
 
    if (first_run) /* Parse and print */
    {
-      log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
+      scan_headers(csp);
+
       for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
       {
          for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
@@ -1413,8 +1456,6 @@ char *sed(const struct parsers pats[],
             /* Header crunch()ed in previous run? -> ignore */
             if (p->str == NULL) continue;
 
-            if (v == pats) log_error(LOG_LEVEL_HEADER, "scan: %s", p->str);
-
             /* Does the current parser handle this header? */
             if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING))
             {
@@ -1463,6 +1504,171 @@ char *sed(const struct parsers pats[],
 }
 
 
+
+/*********************************************************************
+ *
+ * Function    :  header_tagger
+ *
+ * Description :  Executes all text substitutions from applying
+ *                tag actions and saves the result as tag.
+ *
+ *                XXX: Shares enough code with filter_header() and
+ *                pcrs_filter_response() to warrant some helper functions.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  header = Header that is used as tagger input
+ *
+ * Returns     :  JB_ERR_OK on success and always succeeds
+ *
+ *********************************************************************/
+jb_err header_tagger(struct client_state *csp, char *header)
+{
+   int wanted_filter_type;
+   int multi_action_index;
+   int i;
+   pcrs_job *job;
+
+   struct file_list *fl;
+   struct re_filterfile_spec *b;
+   struct list_entry *tag_name;
+
+   int found_filters = 0;
+   const size_t header_length = strlen(header);
+
+   if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
+   {
+      wanted_filter_type = FT_SERVER_HEADER_TAGGER;
+      multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER;
+   }
+   else
+   {
+      wanted_filter_type = FT_CLIENT_HEADER_TAGGER;
+      multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER;
+   }
+
+   /* Check if there are any filters */
+   for (i = 0; i < MAX_AF_FILES; i++)
+   {
+      fl = csp->rlist[i];
+      if (NULL != fl)
+      {
+         if (NULL != fl->f)
+         {
+           found_filters = 1;
+           break;
+         }
+      }
+   }
+
+   if (0 == found_filters)
+   {
+      log_error(LOG_LEVEL_ERROR, "Unable to get current state of regex tagging.");
+      return(JB_ERR_OK);
+   }
+
+   for (i = 0; i < MAX_AF_FILES; i++)
+   {
+      fl = csp->rlist[i];
+      if ((NULL == fl) || (NULL == fl->f))
+      {
+         /*
+          * Either there are no filter files
+          * left, or this filter file just
+          * contains no valid filters.
+          *
+          * Continue to be sure we don't miss
+          * valid filter files that are chained
+          * after empty or invalid ones.
+          */
+         continue;
+      }
+
+      /* For all filters, */
+      for (b = fl->f; b; b = b->next)
+      {
+         if (b->type != wanted_filter_type)
+         {
+            /* skip the ones we don't care about, */
+            continue;
+         }
+         /* leaving only taggers that could apply, of which we use the ones, */
+         for (tag_name = csp->action->multi[multi_action_index]->first;
+              NULL != tag_name; tag_name = tag_name->next)
+         {
+            /* that do apply, and */
+            if (strcmp(b->name, tag_name->str) == 0)
+            {
+               char *modified_tag = NULL;
+               char *tag = header;
+               size_t size = header_length;
+
+               if (NULL == b->joblist)
+               {
+                  log_error(LOG_LEVEL_RE_FILTER,
+                     "Tagger %s has empty joblist. Nothing to do.", b->name);
+                  continue;
+               }
+
+               /* execute their pcrs_joblist on the header. */
+               for (job = b->joblist; NULL != job; job = job->next)
+               {
+                  const int hits = pcrs_execute(job, tag, size, &modified_tag, &size);
+
+                  if (0 < hits)
+                  {
+                     /* Success, continue with the modified version. */
+                     if (tag != header)
+                     {
+                        freez(tag);
+                     }
+                     tag = modified_tag;
+                  }
+                  else
+                  {
+                     /* Tagger doesn't match */
+                     if (0 > hits)
+                     {
+                        /* Regex failure, log it but continue anyway. */
+                        log_error(LOG_LEVEL_ERROR,
+                           "Problems with tagger \'%s\' and header \'%s\': %s",
+                           b->name, *header, pcrs_strerror(hits));
+                     }
+                     freez(modified_tag);
+                  }
+               }
+
+               /* If this tagger matched */
+               if (tag != header)
+               {
+                  /* and there is something left to save, */
+                  if (0 < size)
+                  {
+                     /* enlist a unique version of it as tag. */
+                     if (JB_ERR_OK != enlist_unique(csp->tags, tag, 0))
+                     {
+                        log_error(LOG_LEVEL_ERROR,
+                           "Insufficient memory to add tag \'%s\', "
+                           "based on tagger \'%s\' and header \'%s\'",
+                           tag, b->name, *header);
+                     }
+                     else
+                     {
+                        log_error(LOG_LEVEL_HEADER,
+                           "Adding tag \'%s\' created by header tagger \'%s\'",
+                           tag, b->name);
+                     }
+                  }
+                  freez(tag);
+               }
+            } /* if the tagger applies */
+         } /* for every tagger that could apply */
+      } /* for all filters */
+   } /* for all filter files */
+
+   return JB_ERR_OK;
+}
+
 /* here begins the family of parser functions that reformat header lines */
 
 /*********************************************************************
@@ -3415,9 +3621,6 @@ jb_err connection_close_adder(struct client_state *csp)
  *********************************************************************/
 jb_err server_http(struct client_state *csp, char **header)
 {
-   /* Signal that were now parsing server headers. */
-   csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE;
-
    sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status));
    if (csp->http->status == 206)
    {
index d3cde77..f52b5d6 100644 (file)
--- a/project.h
+++ b/project.h
@@ -1,7 +1,7 @@
 #ifndef PROJECT_H_INCLUDED
 #define PROJECT_H_INCLUDED
 /** Version string. */
-#define PROJECT_H_VERSION "$Id: project.h,v 1.92 2007/03/17 15:20:05 fabiankeil Exp $"
+#define PROJECT_H_VERSION "$Id: project.h,v 1.93 2007/03/20 15:16:34 fabiankeil Exp $"
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/project.h,v $
  *
  * Revisions   :
  *    $Log: project.h,v $
+ *    Revision 1.93  2007/03/20 15:16:34  fabiankeil
+ *    Use dedicated header filter actions instead of abusing "filter".
+ *    Replace "filter-client-headers" and "filter-client-headers"
+ *    with "server-header-filter" and "client-header-filter".
+ *
  *    Revision 1.92  2007/03/17 15:20:05  fabiankeil
  *    New config option: enforce-blocks.
  *
@@ -857,7 +862,7 @@ struct http_response
 };
 
 /**
- * A URL pattern.
+ * A URL or a tag pattern.
  */
 struct url_spec
 {
@@ -875,12 +880,13 @@ struct url_spec
    char *path;         /**< The source for the regex.                         */
    size_t pathlen;     /**< ==strlen(path).  Needed for prefix matching.  FIXME: Now obsolete?     */
    regex_t *preg;      /**< Regex for matching path part                      */
+   regex_t *tag_regex; /**< Regex for matching tags                           */
 };
 
 /**
  * If you declare a static url_spec, this is the value to initialize it to zero.
  */
-#define URL_SPEC_INITIALIZER { NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL }
+#define URL_SPEC_INITIALIZER { NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL }
 
 /**
  * Constant for host part matching in URLs.  If set, indicates that the start of
@@ -1064,8 +1070,12 @@ struct iob
 #define ACTION_MULTI_SERVER_HEADER_FILTER    3
 /** Index into current_action_spec::multi[] for client-header filters to apply. */
 #define ACTION_MULTI_CLIENT_HEADER_FILTER    4
+/** Index into current_action_spec::multi[] for client-header tags to apply. */
+#define ACTION_MULTI_CLIENT_HEADER_TAGGER    5
+/** Index into current_action_spec::multi[] for server-header tags to apply. */
+#define ACTION_MULTI_SERVER_HEADER_TAGGER    6
 /** Number of multi-string actions. */
-#define ACTION_MULTI_COUNT                   5
+#define ACTION_MULTI_COUNT                   7
 
 
 /**
@@ -1263,6 +1273,9 @@ struct client_state
    /** List of all headers for this request */
    struct list headers[1];
 
+   /** List of all tags that apply to this request */
+   struct list tags[1];
+
    /** List of all cookies for this request */
    struct list cookie_list[1];
 
@@ -1452,9 +1465,13 @@ struct forward_spec
 #define FORWARD_SPEC_INITIALIZER { { URL_SPEC_INITIALIZER }, 0, NULL, 0, NULL, 0, NULL }
 
 /* Supported filter types */
-#define FT_CONTENT_FILTER       1
-#define FT_CLIENT_HEADER_FILTER 2
-#define FT_SERVER_HEADER_FILTER 3
+#define FT_CONTENT_FILTER       0
+#define FT_CLIENT_HEADER_FILTER 1
+#define FT_SERVER_HEADER_FILTER 2
+#define FT_CLIENT_HEADER_TAGGER 3
+#define FT_SERVER_HEADER_TAGGER 4
+
+#define MAX_FILTER_TYPES        5
 
 /**
  * This struct represents one filter (one block) from
index 441596f..98847a4 100644 (file)
 #
 # Revisions   :
 #    $Log: edit-actions-for-url,v $
+#    Revision 1.43  2007/03/29 11:40:34  fabiankeil
+#    Divide @filter-params@ into @client-header-filter-params@
+#    @content-filter-params@ and @server-header-filter-params@.
+#
 #    Revision 1.42  2007/03/20 15:40:00  fabiankeil
 #    Adjust to new world order with dedicated header-filter actions.
 #
@@ -515,6 +519,7 @@ function show_send_wafer_opts(tf)
     </tr>
 
 @client-header-filter-params@
+@client-header-tagger-params@
 
    <tr class="bg1" align="left" valign="top">
       <td class="en1" align="center" valign="middle"><input type="radio"
@@ -1200,6 +1205,7 @@ function show_send_wafer_opts(tf)
     </tr>
 
 @server-header-filter-params@
+@server-header-tagger-params@
 
     <tr class="bg1" align="left" valign="top">
       <td class="en1" align="center" valign="middle"><input type="radio"
index 8bb8e26..f3eaa41 100644 (file)
@@ -1,4 +1,4 @@
-const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.15 2007/01/28 16:11:23 fabiankeil Exp $";
+const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.16 2007/02/13 13:59:24 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
@@ -33,6 +33,9 @@ const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.15 2007/01/28 16:11:23 fabianke
  *
  * Revisions   :
  *    $Log: urlmatch.c,v $
+ *    Revision 1.16  2007/02/13 13:59:24  fabiankeil
+ *    Remove redundant log message.
+ *
  *    Revision 1.15  2007/01/28 16:11:23  fabiankeil
  *    Accept WebDAV methods for subversion
  *    in parse_http_request(). Closes FR 1581425.
@@ -741,6 +744,9 @@ static int domain_match(const struct url_spec *pattern, const struct http_reques
 jb_err create_url_spec(struct url_spec * url, const char * buf)
 {
    char *p;
+   int errcode;
+   size_t errlen;
+   char rebuf[BUFFER_SIZE];
 
    assert(url);
    assert(buf);
@@ -758,6 +764,38 @@ jb_err create_url_spec(struct url_spec * url, const char * buf)
       return JB_ERR_MEMORY;
    }
 
+   /* Is it tag pattern? */
+   if (0 == strncmpic("TAG:", url->spec, 4))
+   {
+      if (NULL == (url->tag_regex = zalloc(sizeof(*url->tag_regex))))
+      {
+         freez(url->spec);
+         return JB_ERR_MEMORY;
+      }
+
+      /* buf + 4 to skip "TAG:" */
+      errcode = regcomp(url->tag_regex, buf + 4, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
+      if (errcode)
+      {
+         errlen = regerror(errcode, url->preg, rebuf, sizeof(rebuf));
+         if (errlen > (sizeof(rebuf) - 1))
+         {
+            errlen = sizeof(rebuf) - 1;
+         }
+         rebuf[errlen] = '\0';
+
+         log_error(LOG_LEVEL_ERROR, "error compiling %s: %s", url->spec, rebuf);
+
+         freez(url->spec);
+         regfree(url->tag_regex);
+         freez(url->tag_regex);
+
+         return JB_ERR_PARSE;
+      }
+      return JB_ERR_OK;
+   }
+
+   /* Only reached for URL patterns */
    if ((p = strchr(buf, '/')) != NULL)
    {
       if (NULL == (url->path = strdup(p)))
@@ -775,9 +813,6 @@ jb_err create_url_spec(struct url_spec * url, const char * buf)
    }
    if (url->path)
    {
-      int errcode;
-      char rebuf[BUFFER_SIZE];
-
       if (NULL == (url->preg = zalloc(sizeof(*url->preg))))
       {
          freez(url->spec);
@@ -791,8 +826,7 @@ jb_err create_url_spec(struct url_spec * url, const char * buf)
             (REG_EXTENDED|REG_NOSUB|REG_ICASE));
       if (errcode)
       {
-         size_t errlen = regerror(errcode,
-            url->preg, rebuf, sizeof(rebuf));
+         errlen = regerror(errcode, url->preg, rebuf, sizeof(rebuf));
 
          if (errlen > (sizeof(rebuf) - (size_t)1))
          {
@@ -934,6 +968,11 @@ void free_url_spec(struct url_spec *url)
       regfree(url->preg);
       freez(url->preg);
    }
+   if (url->tag_regex)
+   {
+      regfree(url->tag_regex);
+      freez(url->tag_regex);
+   }
 }
 
 
@@ -947,12 +986,18 @@ void free_url_spec(struct url_spec *url)
  *          1  :  pattern = a URL pattern
  *          2  :  url = URL to match
  *
- * Returns     :  0 iff the URL matches the pattern, else nonzero.
+ * Returns     :  Nonzero if the URL matches the pattern, else 0.
  *
  *********************************************************************/
 int url_match(const struct url_spec *pattern,
               const struct http_request *url)
 {
+   if (pattern->tag_regex != NULL)
+   {
+      /* It's a tag pattern and shouldn't be matched against URLs */
+      return 0;
+   } 
+
    return ((pattern->port == 0) || (pattern->port == url->port))
        && ((pattern->dbuffer == NULL) || (domain_match(pattern, url) == 0))
        && ((pattern->path == NULL) ||