Block farm.plista.com/widgetdata.php

[privoxy.git] / filters.c
diff --git a/filters.c b/filters.c

index 4de7c90..a94454c 100644 (file)
--- a/filters.c
+++ b/filters.c
@@ -1,19 +1,11 @@
-const char filters_rcs[] = "$Id: filters.c,v 1.152 2011/10/30 16:18:12 fabiankeil Exp $";
+const char filters_rcs[] = "$Id: filters.c,v 1.171 2012/03/18 13:47:33 fabiankeil Exp $";
  /*********************************************************************
   *
   * File        :  $Source: /cvsroot/ijbswa/current/filters.c,v $
   *
   * Purpose     :  Declares functions to parse/crunch headers and pages.
- *                Functions declared include:
- *                   `acl_addr', `add_stats', `block_acl', `block_imageurl',
- *                   `block_url', `url_actions', `domain_split',
- *                   `filter_popups', `forward_url', 'redirect_url',
- *                   `ij_untrusted_url', `intercept_url', `pcrs_filter_respose',
- *                   `ijb_send_banner', `trust_url', `gif_deanimate_response',
- *                   `execute_single_pcrs_command', `rewrite_url',
- *                   `get_last_url'
- *
- * Copyright   :  Written by and Copyright (C) 2001-2010 the
+ *
+ * Copyright   :  Written by and Copyright (C) 2001-2011 the
   *                Privoxy team. http://www.privoxy.org/
   *
   *                Based on the Internet Junkbuster originally written
@@ -78,26 +70,12 @@ const char filters_rcs[] = "$Id: filters.c,v 1.152 2011/10/30 16:18:12 fabiankei
  #include "urlmatch.h"
  #include "loaders.h"
  
-#ifdef HAVE_STRTOK
-/* Only used for locks */
-#include "jcc.h"
-#endif /* def HAVE_STRTOK */
-
  #ifdef _WIN32
  #include "win32.h"
  #endif
  
  const char filters_h_rcs[] = FILTERS_H_VERSION;
  
-/* Fix a problem with Solaris.  There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
- * as an array index.  Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- */
-#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
-
  typedef char *(*filter_function_ptr)();
  static filter_function_ptr get_filter_function(const struct client_state *csp);
  static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size);
@@ -198,10 +176,8 @@ static int match_sockaddr(const struct sockaddr_storage *network,
     if (network->ss_family != netmask->ss_family)
     {
        /* This should never happen */
-      log_error(LOG_LEVEL_ERROR,
-         "Internal error at %s:%llu: network and netmask differ in family",
-         __FILE__, __LINE__);
-      return 0;
+      assert(network->ss_family == netmask->ss_family);
+      log_error(LOG_LEVEL_FATAL, "Network and netmask differ in family.");
     }
  
     sockaddr_storage_to_ip(network, &network_addr, &addr_len, &network_port);
@@ -384,7 +360,7 @@ int acl_addr(const char *aspec, struct access_control_addr *aca)
     if ((p = strchr(acl_spec, '/')) != NULL)
     {
        *p++ = '\0';
-      if (ijb_isdigit(*p) == 0)
+      if (privoxy_isdigit(*p) == 0)
        {
           freez(acl_spec);
           return(-1);
@@ -601,7 +577,7 @@ struct http_response *block_url(struct client_state *csp)
        /* determine HOW images should be blocked */
        p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER];
  
-      if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+      if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
        {
           log_error(LOG_LEVEL_ERROR, "handle-as-empty-document overruled by handle-as-image.");
        }
@@ -670,7 +646,7 @@ struct http_response *block_url(struct client_state *csp)
     }
     else
  #endif /* def FEATURE_IMAGE_BLOCKING */
-   if(csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
+   if (csp->action->flags & ACTION_HANDLE_AS_EMPTY_DOCUMENT)
     {
       /*
        *  Send empty document.
@@ -1110,73 +1086,79 @@ char *get_last_url(char *subject, const char *redirect_mode)
        return NULL;
     }
  
-   if (0 == strcmpic(redirect_mode, "check-decoded-url"))
+   if (0 == strcmpic(redirect_mode, "check-decoded-url") && strchr(subject, '%'))
     {  
        log_error(LOG_LEVEL_REDIRECTS,
           "Checking \"%s\" for encoded redirects.", subject);
  
-#if defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK)
        /*
         * Check each parameter in the URL separately.
         * Sectionize the URL at "?" and "&",
-       * then URL-decode each component,
+       * go backwards through the segments, URL-decode them
         * and look for a URL in the decoded result.
-       * Keep the last one we spot.
+       * Stop the search after the first match.
         */
-      char *found = NULL;
+      char *url_segment = NULL;
+      /*
+       * XXX: This estimate is guaranteed to be high enough as we
+       *      let ssplit() ignore empty fields, but also a bit wasteful.
+       */
+      size_t max_segments = strlen(subject) / 2;
+      char **url_segments = malloc(max_segments * sizeof(char *));
+      int segments;
+
+      if (NULL == url_segments)
+      {
+         log_error(LOG_LEVEL_ERROR,
+            "Out of memory while decoding URL: %s", subject);
+         freez(subject);
+         return NULL;
+      }
  
-      privoxy_mutex_lock(&strtok_mutex);
-      char *token = strtok(subject, "?&");
-      while (token)
+      segments = ssplit(subject, "?&", url_segments, max_segments);
+
+      while (segments-- > 0)
        {
-         char *dtoken = url_decode(token);
+         char *dtoken = url_decode(url_segments[segments]);
           if (NULL == dtoken)
           {
-            log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", token);
+            log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]);
              continue;
           }
-         char *h1 = strstr(dtoken, "http://");
-         char *h2 = strstr(dtoken, "https://");
-         char *h = (h1 && h2
-                    ? (h1 < h2 ? h1 : h2)
-                    : (h1 ? h1 : h2));
-         if (h)
+         url_segment = strstr(dtoken, "http://");
+         if (NULL == url_segment)
           {
-            freez(found);
-            found = strdup(h);
-            if (found == NULL)
+            url_segment = strstr(dtoken, "https://");
+         }
+         if (NULL != url_segment)
+         {
+            url_segment = strdup(url_segment);
+            freez(dtoken);
+            if (url_segment == NULL)
              {
                 log_error(LOG_LEVEL_ERROR,
                    "Out of memory while searching for redirects.");
-               privoxy_mutex_unlock(&strtok_mutex);
                 return NULL;
              }
+            break;
           }
           freez(dtoken);
-         token = strtok(NULL, "?&");
        }
-      privoxy_mutex_unlock(&strtok_mutex);
        freez(subject);
+      freez(url_segments);
  
-      return found;
-#else
-      new_url = url_decode(subject);
-      if (new_url != NULL)
+      if (url_segment == NULL)
        {
-         freez(subject);
-         subject = new_url;
+         return NULL;
        }
-      else
-      {
-         log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject);
-      }
-#endif /* defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK) */
+      subject = url_segment;
+   }
+   else
+   {
+      /* Look for a URL inside this one, without decoding anything. */
+      log_error(LOG_LEVEL_REDIRECTS,
+         "Checking \"%s\" for unencoded redirects.", subject);
     }
-
-   /* Else, just look for a URL inside this one, without decoding anything. */
-
-   log_error(LOG_LEVEL_REDIRECTS,
-      "Checking \"%s\" for unencoded redirects.", subject);
  
     /*
      * Find the last URL encoded in the request
@@ -1308,6 +1290,21 @@ struct http_response *redirect_url(struct client_state *csp)
     /* Did any redirect action trigger? */
     if (new_url)
     {
+      if (url_requires_percent_encoding(new_url))
+      {
+         char *encoded_url;
+         log_error(LOG_LEVEL_REDIRECTS, "Percent-encoding redirect URL: %N",
+            strlen(new_url), new_url);
+         encoded_url = percent_encode_url(new_url);
+         freez(new_url);
+         if (encoded_url == NULL)
+         {
+            return cgi_error_memory();
+         }
+         new_url = encoded_url;
+         assert(FALSE == url_requires_percent_encoding(new_url));
+      }
+
        if (0 == strcmpic(new_url, csp->http->url))
        {
           log_error(LOG_LEVEL_ERROR,
@@ -1325,8 +1322,8 @@ struct http_response *redirect_url(struct client_state *csp)
              return cgi_error_memory();
           }
  
-         if ( enlist_unique_header(rsp->headers, "Location", new_url)
-           || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))) )
+         if (enlist_unique_header(rsp->headers, "Location", new_url)
+           || (NULL == (rsp->status = strdup("302 Local Redirect from Privoxy"))))
           {
              freez(new_url);
              free_http_response(rsp);
@@ -1839,19 +1836,15 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size)
           return JB_ERR_PARSE;
        }
  
-      if ((newsize += chunksize) >= *size)
+      if (chunksize >= *size - newsize)
        {
-         /*
-          * XXX: The message is a bit confusing. Isn't the real problem that
-          *      the specified chunk size is greater than the number of bytes
-          *      left in the buffer? This probably means the connection got
-          *      closed prematurely. To be investigated after 3.0.17 is out.
-          */
           log_error(LOG_LEVEL_ERROR,
-            "Chunk size %d exceeds buffer size %d in \"chunked\" transfer coding",
-            chunksize, *size);
+            "Chunk size %u exceeds buffered data left. "
+            "Already digested %u of %u buffered bytes.",
+            chunksize, (unsigned int)newsize, (unsigned int)*size);
           return JB_ERR_PARSE;
        }
+      newsize += chunksize;
        from_p += 2;
  
        memmove(to_p, from_p, (size_t) chunksize);
@@ -2135,7 +2128,7 @@ const static struct forward_spec *get_forward_override_settings(struct client_st
        return NULL;
     }
  
-   vec_count = ssplit(forward_settings, " \t", vec, SZ(vec), 1, 1);
+   vec_count = ssplit(forward_settings, " \t", vec, SZ(vec));
     if ((vec_count == 2) && !strcasecmp(vec[0], "forward"))
     {
        fwd->type = SOCKS_NONE;