Add support for external filters

[privoxy.git] / filters.c
diff --git a/filters.c b/filters.c

index 865c6d8..290940d 100644 (file)
--- a/filters.c
+++ b/filters.c
@@ -1,19 +1,11 @@
-const char filters_rcs[] = "$Id: filters.c,v 1.151 2011/10/30 16:17:57 fabiankeil Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.180 2013/12/24 13:33:13 fabiankeil Exp $";
  /*********************************************************************
   *
   * File        :  $Source: /cvsroot/ijbswa/current/filters.c,v $
   *
   * Purpose     :  Declares functions to parse/crunch headers and pages.
- *                Functions declared include:
- *                   `acl_addr', `add_stats', `block_acl', `block_imageurl',
- *                   `block_url', `url_actions', `domain_split',
- *                   `filter_popups', `forward_url', 'redirect_url',
- *                   `ij_untrusted_url', `intercept_url', `pcrs_filter_respose',
- *                   `ijb_send_banner', `trust_url', `gif_deanimate_response',
- *                   `execute_single_pcrs_command', `rewrite_url',
- *                   `get_last_url'
- *
- * Copyright   :  Written by and Copyright (C) 2001-2010 the
+ *
+ * Copyright   :  Written by and Copyright (C) 2001-2011 the
   *                Privoxy team. http://www.privoxy.org/
   *
   *                Based on the Internet Junkbuster originally written
@@ -73,6 +65,7 @@ const char filters_rcs[] = "$Id: filters.c,v 1.151 2011/10/30 16:17:57 fabiankei
  #include "miscutil.h"
  #include "actions.h"
  #include "cgi.h"
+#include "jcc.h"
  #include "list.h"
  #include "deanimate.h"
  #include "urlmatch.h"
@@ -84,15 +77,6 @@ const char filters_rcs[] = "$Id: filters.c,v 1.151 2011/10/30 16:17:57 fabiankei
  
  const char filters_h_rcs[] = FILTERS_H_VERSION;
  
-/* Fix a problem with Solaris.  There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
- * as an array index.  Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- */
-#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
-
  typedef char *(*filter_function_ptr)();
  static filter_function_ptr get_filter_function(const struct client_state *csp);
  static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size);
@@ -193,10 +177,8 @@ static int match_sockaddr(const struct sockaddr_storage *network,
     if (network->ss_family != netmask->ss_family)
     {
        /* This should never happen */
-      log_error(LOG_LEVEL_ERROR,
-         "Internal error at %s:%llu: network and netmask differ in family",
-         __FILE__, __LINE__);
-      return 0;
+      assert(network->ss_family == netmask->ss_family);
+      log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family.");
     }
  
     sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port);
@@ -219,10 +201,6 @@ static int match_sockaddr(const struct sockaddr_storage *network,
        netmask_addr += 12;
        addr_len = 4;
     }
-   else if (network->ss_family != address->ss_family)
-   {
-      return 0;
-   }
  
     /* XXX: Port check is signaled in netmask */
     if (*netmask_port && *network_port != *address_port)
@@ -379,7 +357,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca)
     if ((p = strchr(acl_spec, '/')) != NULL)
     {
        *p++ = '\0';
-      if (ijb_isdigit(*p) == 0)
+      if (privoxy_isdigit(*p) == 0)
        {
           freez(acl_spec);
           return(-1);
@@ -596,7 +574,7 @@ struct http_response *block_url(struct client_state *csp)
        /* determine HOW images should be blocked */
        p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER];
  
-      if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+      if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
        {
           log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image.");
        }
@@ -665,7 +643,7 @@ struct http_response *block_url(struct client_state *csp)
     }
     else
  #endif /* def FEATURE_IMAGE_BLOCKING */
-   if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+   if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
     {
       /*
        *  Send empty document.
@@ -804,8 +782,8 @@ struct http_response *trust_url(struct client_state *csp)
     struct map * exports;
     char buf[BUFFER_SIZE];
     char *p;
-   struct url_spec **tl;
-   struct url_spec *t;
+   struct pattern_spec **tl;
+   struct pattern_spec *t;
     jb_err err;
  
     /*
@@ -1105,55 +1083,81 @@ char *get_last_url(char *subject, const char *redirect_mode)
        return NULL;
     }
  
-   if (0 == strcmpic(redirect_mode, "check-decoded-url"))
+   if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%'))
     {  
+      char *url_segment = NULL;
+      char **url_segments;
+      size_t max_segments;
+      int segments;
+
        log_error(LOG_LEVEL_REDIRECTS,
           "Checking \"%s\" for encoded redirects.", subject);
+
        /*
         * Check each parameter in the URL separately.
         * Sectionize the URL at "?" and "&",
-       * then URL-decode each component,
+       * go backwards through the segments, URL-decode them
         * and look for a URL in the decoded result.
-       * Keep the last one we spot.
+       * Stop the search after the first match.
+       *
+       * XXX: This estimate is guaranteed to be high enough as we
+       *      let ssplit() ignore empty fields, but also a bit wasteful.
         */
-      char *found = NULL;
-      char *token = strtok(subject, "?&");
-      while (token)
+      max_segments = strlen(subject) / 2;
+      url_segments = malloc(max_segments * sizeof(char *));
+
+      if (NULL == url_segments)
+      {
+         log_error(LOG_LEVEL_ERROR,
+            "Out of memory while decoding URL: %s", subject);
+         freez(subject);
+         return NULL;
+      }
+
+      segments = ssplit(subject, "?&", url_segments, max_segments);
+
+      while (segments-- > 0)
        {
-         char *dtoken = url_decode(token);
+         char *dtoken = url_decode(url_segments[segments]);
           if (NULL == dtoken)
           {
-            log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", token);
+            log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]);
              continue;
           }
-         char *h1 = strstr(dtoken, "http://");
-         char *h2 = strstr(dtoken, "https://");
-         char *h = (h1 && h2
-                    ? (h1 < h2 ? h1 : h2)
-                    : (h1 ? h1 : h2));
-         if (h)
+         url_segment = strstr(dtoken, "http://");
+         if (NULL == url_segment)
+         {
+            url_segment = strstr(dtoken, "https://");
+         }
+         if (NULL != url_segment)
           {
-            freez(found);
-            found = strdup(h);
-            if (found == NULL)
+            url_segment = strdup(url_segment);
+            freez(dtoken);
+            if (url_segment == NULL)
              {
                 log_error(LOG_LEVEL_ERROR,
                    "Out of memory while searching for redirects.");
                 return NULL;
              }
+            break;
           }
           freez(dtoken);
-         token = strtok(NULL, "?&");
        }
        freez(subject);
+      freez(url_segments);
  
-      return found;
+      if (url_segment == NULL)
+      {
+         return NULL;
+      }
+      subject = url_segment;
+   }
+   else
+   {
+      /* Look for a URL inside this one, without decoding anything. */
+      log_error(LOG_LEVEL_REDIRECTS,
+         "Checking \"%s\" for unencoded redirects.", subject);
     }
-
-   /* Else, just look for a URL inside this one, without decoding anything. */
-
-   log_error(LOG_LEVEL_REDIRECTS,
-      "Checking \"%s\" for unencoded redirects.", subject);
  
     /*
      * Find the last URL encoded in the request
@@ -1285,6 +1289,21 @@ struct http_response *redirect_url(struct client_state *csp)
     /* Did any redirect action trigger? */
     if (new_url)
     {
+      if (url_requires_percent_encoding(new_url))
+      {
+         char *encoded_url;
+         log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N",
+            strlen(new_url), new_url);
+         encoded_url = percent_encode_url(new_url);
+         freez(new_url);
+         if (encoded_url == NULL)
+         {
+            return cgi_error_memory();
+         }
+         new_url = encoded_url;
+         assert(FALSE == url_requires_percent_encoding(new_url));
+      }
+
        if (0 == strcmpic(new_url, csp->http->url))
        {
           log_error(LOG_LEVEL_ERROR,
@@ -1302,8 +1321,8 @@ struct http_response *redirect_url(struct client_state *csp)
              return cgi_error_memory();
           }
  
-         if ( enlist_unique_header(rsp->headers, "Location", new_url)
-           || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+         if (enlist_unique_header(rsp->headers, "Location", new_url)
+           || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))))
           {
              freez(new_url);
              free_http_response(rsp);
@@ -1391,7 +1410,7 @@ int is_untrusted_url(const struct client_state *csp)
  {
     struct file_list *fl;
     struct block_spec *b;
-   struct url_spec **trusted_url;
+   struct pattern_spec **trusted_url;
     struct http_request rhttp[1];
     const char * referer;
     jb_err err;
@@ -1507,6 +1526,66 @@ int is_untrusted_url(const struct client_state *csp)
  #endif /* def FEATURE_TRUST */
  
  
+/*********************************************************************
+ *
+ * Function    :  get_filter
+ *
+ * Description :  Get a filter with a given name and type.
+ *                Note that taggers are filters, too.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  requested_name = Name of the content filter to get
+ *          3  :  requested_type = Type of the filter to tagger to lookup
+ *
+ * Returns     :  A pointer to the requested filter
+ *                or NULL if the filter wasn't found
+ *
+ *********************************************************************/
+struct re_filterfile_spec *get_filter(const struct client_state *csp,
+                                      const char *requested_name,
+                                      enum filter_type requested_type)
+{
+   int i;
+   struct re_filterfile_spec *b;
+   struct file_list *fl;
+
+   for (i = 0; i < MAX_AF_FILES; i++)
+   {
+     fl = csp->rlist[i];
+     if ((NULL == fl) || (NULL == fl->f))
+     {
+        /*
+         * Either there are no filter files left or this
+         * filter file just contains no valid filters.
+         *
+         * Continue to be sure we don't miss valid filter
+         * files that are chained after empty or invalid ones.
+         */
+        continue;
+     }
+
+     for (b = fl->f; b != NULL; b = b->next)
+     {
+        if (b->type != requested_type)
+        {
+           /* The callers isn't interested in this filter type. */
+           continue;
+        }
+        if (strcmp(b->name, requested_name) == 0)
+        {
+           /* The requested filter has been found. Abort search. */
+           return b;
+        }
+     }
+   }
+
+   /* No filter with the given name and type exists. */
+   return NULL;
+
+}
+
+
  /*********************************************************************
   *
   * Function    :  pcrs_filter_response
@@ -1525,14 +1604,12 @@ int is_untrusted_url(const struct client_state *csp)
  static char *pcrs_filter_response(struct client_state *csp)
  {
     int hits = 0;
-   int i;
     size_t size, prev_size;
  
     char *old = NULL;
     char *new = NULL;
     pcrs_job *job;
  
-   struct file_list *fl;
     struct re_filterfile_spec *b;
     struct list_entry *filtername;
  
@@ -1554,108 +1631,87 @@ static char *pcrs_filter_response(struct client_state *csp)
     size = (size_t)(csp->iob->eod - csp->iob->cur);
     old = csp->iob->cur;
  
-   for (i = 0; i < MAX_AF_FILES; i++)
-   {
-     fl = csp->rlist[i];
-     if ((NULL == fl) || (NULL == fl->f))
-     {
-        /*
-         * Either there are no filter files
-         * left, or this filter file just
-         * contains no valid filters.
-         *
-         * Continue to be sure we don't miss
-         * valid filter files that are chained
-         * after empty or invalid ones.
-         */
-        continue;
-     }
     /*
      * For all applying +filter actions, look if a filter by that
      * name exists and if yes, execute it's pcrs_joblist on the
      * buffer.
      */
-   for (b = fl->f; b; b = b->next)
+   for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
+        filtername != NULL; filtername = filtername->next)
     {
-      if (b->type != FT_CONTENT_FILTER)
+      int current_hits = 0; /* Number of hits caused by this filter */
+      int job_number   = 0; /* Which job we're currently executing  */
+      int job_hits     = 0; /* How many hits the current job caused */
+      pcrs_job *joblist;
+
+      b = get_filter(csp, filtername->str, FT_CONTENT_FILTER);
+      if (b == NULL)
        {
-         /* Skip header filters */
           continue;
        }
  
-      for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first;
-           filtername ; filtername = filtername->next)
-      {
-         if (strcmp(b->name, filtername->str) == 0)
-         {
-            int current_hits = 0; /* Number of hits caused by this filter */
-            int job_number   = 0; /* Which job we're currently executing  */
-            int job_hits     = 0; /* How many hits the current job caused */
-            pcrs_job *joblist = b->joblist;
+      joblist = b->joblist;
  
-            if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+      if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
  
-            if (NULL == joblist)
-            {
-               log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
-               continue;
-            }
+      if (NULL == joblist)
+      {
+         log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
+         continue;
+      }
  
-            prev_size = size;
-            /* Apply all jobs from the joblist */
-            for (job = joblist; NULL != job; job = job->next)
-            {
-               job_number++;
-               job_hits = pcrs_execute(job, old, size, &new, &size);
+      prev_size = size;
+      /* Apply all jobs from the joblist */
+      for (job = joblist; NULL != job; job = job->next)
+      {
+         job_number++;
+         job_hits = pcrs_execute(job, old, size, &new, &size);
  
-               if (job_hits >= 0)
-               {
-                  /*
-                   * That went well. Continue filtering
-                   * and use the result of this job as
-                   * input for the next one.
-                   */
-                  current_hits += job_hits;
-                  if (old != csp->iob->cur)
-                  {
-                     freez(old);
-                  }
-                  old = new;
-               }
-               else
-               {
-                  /*
-                   * This job caused an unexpected error. Inform the user
-                   * and skip the rest of the jobs in this filter. We could
-                   * continue with the next job, but usually the jobs
-                   * depend on each other or are similar enough to
-                   * fail for the same reason.
-                   *
-                   * At the moment our pcrs expects the error codes of pcre 3.4,
-                   * but newer pcre versions can return additional error codes.
-                   * As a result pcrs_strerror()'s error message might be
-                   * "Unknown error ...", therefore we print the numerical value
-                   * as well.
-                   *
-                   * XXX: Is this important enough for LOG_LEVEL_ERROR or
-                   * should we use LOG_LEVEL_RE_FILTER instead?
-                   */
-                  log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
-                     b->name, job_number, pcrs_strerror(job_hits), job_hits);
-                  break;
-               }
+         if (job_hits >= 0)
+         {
+            /*
+             * That went well. Continue filtering
+             * and use the result of this job as
+             * input for the next one.
+             */
+            current_hits += job_hits;
+            if (old != csp->iob->cur)
+            {
+               freez(old);
              }
+            old = new;
+         }
+         else
+         {
+            /*
+             * This job caused an unexpected error. Inform the user
+             * and skip the rest of the jobs in this filter. We could
+             * continue with the next job, but usually the jobs
+             * depend on each other or are similar enough to
+             * fail for the same reason.
+             *
+             * At the moment our pcrs expects the error codes of pcre 3.4,
+             * but newer pcre versions can return additional error codes.
+             * As a result pcrs_strerror()'s error message might be
+             * "Unknown error ...", therefore we print the numerical value
+             * as well.
+             *
+             * XXX: Is this important enough for LOG_LEVEL_ERROR or
+             * should we use LOG_LEVEL_RE_FILTER instead?
+             */
+            log_error(LOG_LEVEL_ERROR, "Skipped filter \'%s\' after job number %u: %s (%d)",
+               b->name, job_number, pcrs_strerror(job_hits), job_hits);
+            break;
+         }
+      }
  
-            if (b->dynamic) pcrs_free_joblist(joblist);
+      if (b->dynamic) pcrs_free_joblist(joblist);
  
-            log_error(LOG_LEVEL_RE_FILTER,
-               "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
-               csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
+      log_error(LOG_LEVEL_RE_FILTER,
+         "filtering %s%s (size %d) with \'%s\' produced %d hits (new size %d).",
+         csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size);
  
-            hits += current_hits;
-         }
-      }
-   }
+      hits += current_hits;
     }
  
     /*
@@ -1670,13 +1726,236 @@ static char *pcrs_filter_response(struct client_state *csp)
  
     csp->flags |= CSP_FLAG_MODIFIED;
     csp->content_length = size;
-   IOB_RESET(csp);
+   clear_iob(csp->iob);
  
     return(new);
  
  }
  
  
+#ifdef FEATURE_EXTERNAL_FILTERS
+/*********************************************************************
+ *
+ * Function    :  get_external_filter
+ *
+ * Description :  Lookup the code to execute for an external filter.
+ *                Masks the misuse of the re_filterfile_spec.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  name = Name of the content filter to get
+ *
+ * Returns     :  A pointer to the requested code
+ *                or NULL if the filter wasn't found
+ *
+ *********************************************************************/
+static const char *get_external_filter(const struct client_state *csp,
+                                const char *name)
+{
+   struct re_filterfile_spec *external_filter;
+
+   external_filter = get_filter(csp, name, FT_EXTERNAL_CONTENT_FILTER);
+   if (external_filter == NULL)
+   {
+      log_error(LOG_LEVEL_FATAL,
+         "Didn't find stuff to execute for external filter: %s",
+         name);
+   }
+
+   return external_filter->patterns->first->str;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  set_privoxy_variables
+ *
+ * Description :  Sets a couple of privoxy-specific environment variables
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns     :  N/A
+ *
+ *********************************************************************/
+static void set_privoxy_variables(const struct client_state *csp)
+{
+   int i;
+   struct {
+      const char *name;
+      const char *value;
+   } env[] = {
+      { "PRIVOXY_URL",    csp->http->url   },
+      { "PRIVOXY_PATH",   csp->http->path  },
+      { "PRIVOXY_HOST",   csp->http->host  },
+      { "PRIVOXY_ORIGIN", csp->ip_addr_str },
+   };
+
+   for (i = 0; i < SZ(env); i++)
+   {
+      if (setenv(env[i].name, env[i].value, 1))
+      {
+         log_error(LOG_LEVEL_ERROR, "Failed to set %s=%s: %E",
+            env[i].name, env[i].value);
+      }
+   }
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  execute_external_filter
+ *
+ * Description :  Pipe content into external filter and return the output
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  name = Name of the external filter to execute
+ *          3  :  content = The original content to filter
+ *          4  :  size = The size of the content buffer
+ *
+ * Returns     :  a pointer to the (newly allocated) modified buffer.
+ *                or NULL if there were no hits or something went wrong
+ *
+ *********************************************************************/
+static char *execute_external_filter(const struct client_state *csp,
+   const char *name, char *content, size_t *size)
+{
+   char cmd[200];
+   char file_name[FILENAME_MAX];
+   FILE *fp;
+   char *filter_output;
+   int fd;
+   int ret;
+   size_t new_size;
+   const char *external_filter;
+
+   if (csp->config->temporary_directory == NULL)
+   {
+      log_error(LOG_LEVEL_ERROR,
+         "No temporary-directory configured. Can't execute filter: %s",
+         name);
+      return NULL;
+   }
+
+   external_filter = get_external_filter(csp, name);
+
+   if (sizeof(file_name) < snprintf(file_name, sizeof(file_name),
+         "%s/privoxy-XXXXXXXX", csp->config->temporary_directory))
+   {
+      log_error(LOG_LEVEL_ERROR, "temporary-directory path too long");
+      return NULL;
+   }
+
+   fd = mkstemp(file_name);
+   if (fd == -1)
+   {
+      log_error(LOG_LEVEL_ERROR, "mkstemp() failed to create %s: %E", file_name);
+      return NULL;
+   }
+
+   fp = fdopen(fd, "w");
+   if (fp == NULL)
+   {
+      log_error(LOG_LEVEL_ERROR, "fdopen() failed: %E");
+      unlink(file_name);
+      return NULL;
+   }
+
+   /*
+    * The size may be zero if a previous filter discarded everything.
+    *
+    * This isn't necessary unintentional, so we just don't try
+    * to fwrite() nothing and let the user deal with the rest.
+    */
+   if ((*size != 0) && fwrite(content, *size, 1, fp) != 1)
+   {
+      log_error(LOG_LEVEL_ERROR, "fwrite(..., %d, 1, ..) failed: %E", *size);
+      unlink(file_name);
+      return NULL;
+   }
+   fclose(fp);
+
+   if (sizeof(cmd) < snprintf(cmd, sizeof(cmd), "%s < %s", external_filter, file_name))
+   {
+      log_error(LOG_LEVEL_ERROR,
+         "temporary-directory or external filter path too long");
+      unlink(file_name);
+      return NULL;
+   }
+
+   log_error(LOG_LEVEL_RE_FILTER, "Executing '%s': %s", name, cmd);
+
+   /*
+    * The locking is necessary to prevent other threads
+    * from overwriting the environment variables before
+    * the popen fork. Afterwards this no longer matters.
+    */
+   privoxy_mutex_lock(&external_filter_mutex);
+   set_privoxy_variables(csp);
+   fp = popen(cmd, "r");
+   privoxy_mutex_unlock(&external_filter_mutex);
+   if (fp == NULL)
+   {
+      log_error(LOG_LEVEL_ERROR, "popen(\"%s\", \"r\") failed: %E", cmd);
+      unlink(file_name);
+      return NULL;
+   }
+
+   filter_output = malloc_or_die(*size);
+
+   new_size = 0;
+   while (!feof(fp) && !ferror(fp))
+   {
+      size_t len;
+      /* Could be bigger ... */
+      enum { READ_LENGTH = 2048 };
+
+      if (new_size + READ_LENGTH >= *size)
+      {
+         char *p;
+
+         /* Could be considered wasteful if the content is 'large'. */
+         *size = (*size != 0) ? *size * 2 : READ_LENGTH;
+
+         p = realloc(filter_output, *size);
+         if (p == NULL)
+         {
+            log_error(LOG_LEVEL_ERROR, "Out of memory while reading "
+               "external filter output. Using what we got so far.");
+            break;
+         }
+         filter_output = p;
+      }
+      len = fread(&filter_output[new_size], 1, READ_LENGTH, fp);
+      if (len > 0)
+      {
+         new_size += len;
+      }
+   }
+
+   ret = pclose(fp);
+   if (ret == -1)
+   {
+      log_error(LOG_LEVEL_ERROR, "Executing %s failed: %E", cmd);
+   }
+   else
+   {
+      log_error(LOG_LEVEL_RE_FILTER,
+         "Executing '%s' resulted in return value %d. "
+         "Read %d of up to %d bytes.", name, (ret >> 8), new_size, *size);
+   }
+
+   unlink(file_name);
+   *size = new_size;
+
+   return filter_output;
+
+}
+#endif /* def FEATURE_EXTERNAL_FILTERS */
+
+
  /*********************************************************************
   *
   * Function    :  gif_deanimate_response
@@ -1743,7 +2022,8 @@ static char *gif_deanimate_response(struct client_state *csp)
   * Function    :  get_filter_function
   *
   * Description :  Decides which content filter function has
- *                to be applied (if any).
+ *                to be applied (if any). Only considers functions
+ *                for internal filters which are mutually-exclusive.
   *
   * Parameters  :
   *          1  :  csp = Current client state (buffers, headers, etc...)
@@ -1816,19 +2096,15 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size)
           return JB_ERR_PARSE;
        }
  
-      if ((newsize += chunksize) >= *size)
+      if (chunksize >= *size - newsize)
        {
-         /*
-          * XXX: The message is a bit confusing. Isn't the real problem that
-          *      the specified chunk size is greater than the number of bytes
-          *      left in the buffer? This probably means the connection got
-          *      closed prematurely. To be investigated after 3.0.17 is out.
-          */
           log_error(LOG_LEVEL_ERROR,
-            "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding",
-            chunksize, *size);
+            "Chunk size %u exceeds buffered data left. "
+            "Already digested %u of %u buffered bytes.",
+            chunksize, (unsigned int)newsize, (unsigned int)*size);
           return JB_ERR_PARSE;
        }
+      newsize += chunksize;
        from_p += 2;
  
        memmove(to_p, from_p, (size_t) chunksize);
@@ -1942,6 +2218,7 @@ static jb_err prepare_for_filtering(struct client_state *csp)
   *********************************************************************/
  char *execute_content_filters(struct client_state *csp)
  {
+   char *content;
     filter_function_ptr content_filter;
  
     assert(content_filters_enabled(csp->action));
@@ -1972,8 +2249,32 @@ char *execute_content_filters(struct client_state *csp)
     }
  
     content_filter = get_filter_function(csp);
+   content = (content_filter != NULL) ? (*content_filter)(csp) : NULL;
+
+#ifdef FEATURE_EXTERNAL_FILTERS
+   if (!list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]))
+   {
+      struct list_entry *filtername;
+      size_t size = (size_t)csp->content_length;
+
+      if (content == NULL)
+      {
+         content = csp->iob->cur;
+         size = (size_t)(csp->iob->eod - csp->iob->cur);
+      }
+
+      for (filtername = csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]->first;
+           filtername ; filtername = filtername->next)
+      {
+         content = execute_external_filter(csp, filtername->str, content, &size);
+      }
+      csp->flags |= CSP_FLAG_MODIFIED;
+      csp->content_length = size;
+   }
+#endif /* def FEATURE_EXTERNAL_FILTERS */
+
+   return content;
  
-   return ((*content_filter)(csp));
  }
  
  
@@ -2112,7 +2413,7 @@ const static struct forward_spec *get_forward_override_settings(struct client_st
        return NULL;
     }
  
-   vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1);
+   vec_count = ssplit(forward_settings, " \t", vec, SZ(vec));
     if ((vec_count == 2) && !strcasecmp(vec[0], "forward"))
     {
        fwd->type = SOCKS_NONE;
@@ -2140,6 +2441,11 @@ const static struct forward_spec *get_forward_override_settings(struct client_st
           fwd->type = SOCKS_5;
           socks_proxy = vec[1];
        }
+      else if (!strcasecmp(vec[0], "forward-socks5t"))
+      {
+         fwd->type = SOCKS_5T;
+         socks_proxy = vec[1];
+      }
  
        if (NULL != socks_proxy)
        {
@@ -2192,7 +2498,7 @@ const static struct forward_spec *get_forward_override_settings(struct client_st
  const struct forward_spec *forward_url(struct client_state *csp,
                                         const struct http_request *http)
  {
-   static const struct forward_spec fwd_default[1] = { FORWARD_SPEC_INITIALIZER };
+   static const struct forward_spec fwd_default[1]; /* Zero'ed due to being static. */
     struct forward_spec *fwd = csp->config->forward;
  
     if (csp->action->flags & ACTION_FORWARD_OVERRIDE)
@@ -2358,7 +2664,7 @@ int content_requires_filtering(struct client_state *csp)
        return TRUE;
     }
  
-   return FALSE;
+   return (!list_is_empty(csp->action->multi[ACTION_MULTI_EXTERNAL_FILTER]));
  
  }
  
@@ -2379,7 +2685,8 @@ int content_requires_filtering(struct client_state *csp)
  int content_filters_enabled(const struct current_action_spec *action)
  {
     return ((action->flags & ACTION_DEANIMATE) ||
-      !list_is_empty(action->multi[ACTION_MULTI_FILTER]));
+      !list_is_empty(action->multi[ACTION_MULTI_FILTER]) ||
+      !list_is_empty(action->multi[ACTION_MULTI_EXTERNAL_FILTER]));
  }