Fix suse-dist as described in BR#1654052.

[privoxy.git] / parsers.c
diff --git a/parsers.c b/parsers.c

index fd62f9d..f84cf89 100644 (file)
--- a/parsers.c
+++ b/parsers.c
@@ -1,4 +1,4 @@
-const char parsers_rcs[] = "$Id: parsers.c,v 1.81 2006/12/31 22:21:33 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.87 2007/01/31 16:21:38 fabiankeil Exp $";
  /*********************************************************************
   *
   * File        :  $Source: /cvsroot/ijbswa/current/parsers.c,v $
@@ -18,7 +18,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.81 2006/12/31 22:21:33 fabiankeil
   *                   `client_if_none_match', `get_destination_from_headers',
   *                   `parse_header_time' and `server_set_cookie'.
   *
- * Copyright   :  Written by and Copyright (C) 2001-2006 the SourceForge
+ * Copyright   :  Written by and Copyright (C) 2001-2007 the SourceForge
   *                Privoxy team. http://www.privoxy.org/
   *
   *                Based on the Internet Junkbuster originally written
@@ -45,6 +45,42 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.81 2006/12/31 22:21:33 fabiankeil
   *
   * Revisions   :
   *    $Log: parsers.c,v $
+ *    Revision 1.87  2007/01/31 16:21:38  fabiankeil
+ *    Search for Max-Forwards headers case-insensitive,
+ *    don't generate the "501 unsupported" message for invalid
+ *    Max-Forwards values and don't increase negative ones.
+ *
+ *    Revision 1.86  2007/01/30 13:05:26  fabiankeil
+ *    - Let server_set_cookie() check the expiration date
+ *      of cookies and don't touch the ones that are already
+ *      expired. Fixes problems with low quality web applications
+ *      as described in BR 932612.
+ *
+ *    - Adjust comment in client_max_forwards to reality;
+ *      remove invalid Max-Forwards headers.
+ *
+ *    Revision 1.85  2007/01/26 15:33:46  fabiankeil
+ *    Stop filter_header() from unintentionally removing
+ *    empty header lines that were enlisted by the continue
+ *    hack.
+ *
+ *    Revision 1.84  2007/01/24 12:56:52  fabiankeil
+ *    - Repeat the request URL before logging any headers.
+ *      Makes reading the log easier in case of simultaneous requests.
+ *    - If there are more than one Content-Type headers in one request,
+ *      use the first one and remove the others.
+ *    - Remove "newval" variable in server_content_type().
+ *      It's only used once.
+ *
+ *    Revision 1.83  2007/01/12 15:03:02  fabiankeil
+ *    Correct a cast, check inflateEnd() exit code
+ *    to see if we have to, replace sprintf calls
+ *    with snprintf.
+ *
+ *    Revision 1.82  2007/01/01 19:36:37  fabiankeil
+ *    Integrate a modified version of Wil Mahan's
+ *    zlib patch (PR #895531).
+ *
   *    Revision 1.81  2006/12/31 22:21:33  fabiankeil
   *    Skip empty filter files in filter_header()
   *    but don't ignore the ones that come afterwards.
@@ -816,6 +852,8 @@ jb_err add_to_iob(struct client_state *csp, char *buf, int n)
  jb_err decompress_iob(struct client_state *csp)
  {
     char  *buf;       /* new, uncompressed buffer */
+   char  *cur;       /* Current iob position (to keep the original 
+                      * iob->cur unmodified if we return early) */
     size_t bufsize;   /* allocated size of the new buffer */
     size_t skip_size; /* Number of bytes at the beginning of the iob
                          that we should NOT decompress. */
@@ -825,6 +863,8 @@ jb_err decompress_iob(struct client_state *csp)
     bufsize = csp->iob->size;
     skip_size = (size_t)(csp->iob->cur - csp->iob->buf);
  
+   cur = csp->iob->cur;
+
     if (bufsize < 10)
     {
        /*
@@ -849,16 +889,16 @@ jb_err decompress_iob(struct client_state *csp)
         * Strip off the gzip header. Please see RFC 1952 for more
         * explanation of the appropriate fields.
         */
-      if ((*csp->iob->cur++ != (char)0x1f)
-       || (*csp->iob->cur++ != (char)0x8b)
-       || (*csp->iob->cur++ != Z_DEFLATED))
+      if ((*cur++ != (char)0x1f)
+       || (*cur++ != (char)0x8b)
+       || (*cur++ != Z_DEFLATED))
        {
           log_error (LOG_LEVEL_ERROR, "Invalid gzip header when decompressing");
           return JB_ERR_COMPRESS;
        }
        else
        {
-         int flags = *csp->iob->cur++;
+         int flags = *cur++;
           /*
            * XXX: These magic numbers should be replaced
            * with macros to give a better idea what they do.
@@ -866,10 +906,10 @@ jb_err decompress_iob(struct client_state *csp)
           if (flags & 0xe0)
           {
              /* The gzip header has reserved bits set; bail out. */
-            log_error (LOG_LEVEL_ERROR, "Invalid gzip header when decompressing");
+            log_error (LOG_LEVEL_ERROR, "Invalid gzip header flags when decompressing");
              return JB_ERR_COMPRESS;
           }
-         csp->iob->cur += 6;
+         cur += 6;
  
           /* Skip extra fields if necessary. */
           if (flags & 0x04)
@@ -878,32 +918,73 @@ jb_err decompress_iob(struct client_state *csp)
               * Skip a given number of bytes, specified
               * as a 16-bit little-endian value.
               */
-            csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8);
+            /*
+             * XXX: This code used to be:
+             * 
+             * csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8);
+             *
+             * which I had to change into:
+             *
+             * cur += *cur++ + (*cur++ << 8);
+             *
+             * at which point gcc43 finally noticed that the value
+             * of cur is undefined (it depends on which of the
+             * summands is evaluated first).
+             *
+             * I haven't come across a site where this
+             * code is actually executed yet, but I hope
+             * it works anyway.
+             */
+            int skip_bytes;
+            skip_bytes = *cur++;
+            skip_bytes = *cur++ << 8;
+
+            assert(skip_bytes == *csp->iob->cur-2 + ((*csp->iob->cur-1) << 8));
+
+            /*
+             * The number of bytes to skip should be positive
+             * and we'd like to stay in the buffer.
+             */
+            if((skip_bytes < 0) || (skip_bytes >= (csp->iob->eod - cur)))
+            {
+               log_error (LOG_LEVEL_ERROR,
+                  "Unreasonable amount of bytes to skip (%d). Stopping decompression",
+                  skip_bytes);
+               return JB_ERR_COMPRESS;
+            }
+            log_error (LOG_LEVEL_INFO,
+               "Skipping %d bytes for gzip compression. Does this sound right?",
+               skip_bytes);
+            cur += skip_bytes;
           }
  
           /* Skip the filename if necessary. */
           if (flags & 0x08)
           {
              /* A null-terminated string follows. */
-            while (*csp->iob->cur++);
+            while (*cur++);
           }
  
           /* Skip the comment if necessary. */
           if (flags & 0x10)
           {
-            while (*csp->iob->cur++);
+            while (*cur++);
           }
  
           /* Skip the CRC if necessary. */
           if (flags & 0x02)
           {
-            csp->iob->cur += 2;
+            cur += 2;
           }
        }
     }
     else if (csp->content_type & CT_DEFLATE)
     {
-      log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *csp->iob->cur);
+      /*
+       * XXX: The debug level should be lowered
+       * before the next stable release.
+       */
+      log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *cur);
        /*
         * In theory (that is, according to RFC 1950), deflate-compressed
         * data should begin with a two-byte zlib header and have an
@@ -929,8 +1010,8 @@ jb_err decompress_iob(struct client_state *csp)
     }
  
     /* Set up the fields required by zlib. */
-   zstr.next_in  = (Bytef *)csp->iob->cur;
-   zstr.avail_in = (unsigned long)(csp->iob->eod - csp->iob->cur);
+   zstr.next_in  = (Bytef *)cur;
+   zstr.avail_in = (unsigned int)(csp->iob->eod - cur);
     zstr.zalloc   = Z_NULL;
     zstr.zfree    = Z_NULL;
     zstr.opaque   = Z_NULL;
@@ -1031,7 +1112,22 @@ jb_err decompress_iob(struct client_state *csp)
        }
     }
  
-   inflateEnd(&zstr);
+   if (Z_STREAM_ERROR == inflateEnd(&zstr))
+   {
+      log_error(LOG_LEVEL_ERROR,
+         "Inconsistent stream state after decompression: %s", zstr.msg);
+      /*
+       * XXX: Intentionally no return.
+       *
+       * According to zlib.h, Z_STREAM_ERROR is returned
+       * "if the stream state was inconsistent".
+       *
+       * I assume in this case inflate()'s status
+       * would also be something different than Z_STREAM_END
+       * so this check should be redundant, but lets see.
+       */
+   }
+
     if (status != Z_STREAM_END)
     {
        /* We failed to decompress the stream. */
@@ -1229,6 +1325,7 @@ char *sed(const struct parsers pats[],
  
     if (first_run) /* Parse and print */
     {
+      log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
        for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
        {
           for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
@@ -1470,7 +1567,12 @@ jb_err filter_header(struct client_state *csp, char **header)
        }
     }
  
-   if ( 0 == size )
+   /*
+    * Additionally checking for hits is important because if
+    * the continue hack is triggered, server headers can
+    * arrive empty to separate multiple heads from each other.
+    */
+   if ((0 == size) && hits)
     {
        log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header);
        freez(*header);
@@ -1540,12 +1642,14 @@ jb_err crunch_server_header(struct client_state *csp, char **header)
  
     return JB_ERR_OK;
  }
+
+
  /*********************************************************************
   *
   * Function    :  server_content_type
   *
   * Description :  Set the content-type for filterable types (text/.*,
- *                javascript and image/gif) unless filtering has been
+ *                .*xml.*, javascript and image/gif) unless filtering has been
   *                forbidden (CT_TABOO) while parsing earlier headers.
   *                NOTE: Since text/plain is commonly used by web servers
   *                      for files whose correct type is unknown, we don't
@@ -1564,11 +1668,22 @@ jb_err crunch_server_header(struct client_state *csp, char **header)
   *********************************************************************/
  jb_err server_content_type(struct client_state *csp, char **header)
  {
-   const char *newval;
-   
-   newval = csp->action->string[ACTION_STRING_CONTENT_TYPE]; 
+   /* Remove header if it isn't the first Content-Type header */
+   if(csp->content_type && (csp->content_type != CT_TABOO))
+   {
+     /*
+      * Another, slightly slower, way to see if
+      * we already parsed another Content-Type header.
+      */
+      assert(NULL != get_header_value(csp->headers, "Content-Type:"));
+
+      log_error(LOG_LEVEL_ERROR,
+         "Multiple Content-Type headers. Removing and ignoring: \'%s\'",
+         *header);
+      freez(*header);
  
-   assert(!csp->content_type || (csp->content_type == CT_TABOO));
+      return JB_ERR_OK;
+   }
  
     if (!(csp->content_type & CT_TABOO))
     {
@@ -1622,12 +1737,11 @@ jb_err server_content_type(struct client_state *csp, char **header)
        { 
           freez(*header);
           *header = strdup("Content-Type: ");
-         string_append(header, newval);
+         string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]);
  
           if (header == NULL)
           { 
-            log_error(LOG_LEVEL_HEADER,
-               "Insufficient memory. Content-Type crunched without replacement!");
+            log_error(LOG_LEVEL_HEADER, "Insufficient memory to replace Content-Type!");
              return JB_ERR_MEMORY;
           }
           log_error(LOG_LEVEL_HEADER, "Modified: %s!", *header);
@@ -1733,8 +1847,8 @@ jb_err server_transfer_coding(struct client_state *csp, char **header)
  jb_err server_content_encoding(struct client_state *csp, char **header)
  {
  #ifdef FEATURE_ZLIB
-   /* XXX: Why would we modify the content if it was taboo? */
-   if ((csp->flags & CSP_FLAG_MODIFIED) && !(csp->content_type & CT_TABOO))
+   if ((csp->flags & CSP_FLAG_MODIFIED)
+    && (csp->content_type & (CT_GZIP | CT_DEFLATE)))
     {
        /*
         * We successfully decompressed the content,
@@ -1800,6 +1914,7 @@ jb_err server_content_encoding(struct client_state *csp, char **header)
   *********************************************************************/
  jb_err server_content_length(struct client_state *csp, char **header)
  {
+   const size_t max_header_length = 80;
     if (csp->content_length != 0) /* Content length could have been modified */
     {
        /*
@@ -1807,15 +1922,16 @@ jb_err server_content_length(struct client_state *csp, char **header)
         * is different than the original value?
         */
        freez(*header);
-      *header = (char *) zalloc(100);
+      *header = (char *) zalloc(max_header_length);
        if (*header == NULL)
        {
           return JB_ERR_MEMORY;
        }
  
-      sprintf(*header, "Content-Length: %d", (int) csp->content_length);
-
-      log_error(LOG_LEVEL_HEADER, "Adjust Content-Length to %d", (int) csp->content_length);
+      snprintf(*header, max_header_length, "Content-Length: %d",
+         (int)csp->content_length);
+      log_error(LOG_LEVEL_HEADER, "Adjusted Content-Length to %d",
+         (int)csp->content_length);
     }
  
     return JB_ERR_OK;
@@ -2308,11 +2424,13 @@ jb_err client_accept_language(struct client_state *csp, char **header)
  
        if (*header == NULL)
        {
-         log_error(LOG_LEVEL_ERROR, " Insufficent memory. Accept-Language header crunched without replacement.");  
+         log_error(LOG_LEVEL_ERROR,
+            "Insufficent memory. Accept-Language header crunched without replacement.");  
        }
        else
        {
-         log_error(LOG_LEVEL_HEADER, "Accept-Language header crunched and replaced with: %s", *header);
+         log_error(LOG_LEVEL_HEADER,
+            "Accept-Language header crunched and replaced with: %s", *header);
        }
     }
     return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
@@ -2581,23 +2699,42 @@ jb_err client_x_forwarded(struct client_state *csp, char **header)
   *********************************************************************/
  jb_err client_max_forwards(struct client_state *csp, char **header)
  {
-   unsigned int max_forwards;
+   int max_forwards;
  
-   if ((0 == strcmpic(csp->http->gpc, "trace"))
-      || (0 == strcmpic(csp->http->gpc, "options")))
+   if ((0 == strcmpic(csp->http->gpc, "trace")) ||
+       (0 == strcmpic(csp->http->gpc, "options")))
     {
-      if (1 == sscanf(*header, "Max-Forwards: %u", &max_forwards))
+      assert(*(*header+12) == ':');
+      if (1 == sscanf(*header+12, ": %u", &max_forwards))
        {
-         if (max_forwards-- >= 1)
+         if (max_forwards > 0)
           {
-            sprintf(*header, "Max-Forwards: %u", max_forwards);
-            log_error(LOG_LEVEL_HEADER, "Max forwards of %s request now %d", csp->http->gpc, max_forwards);
+            snprintf(*header, strlen(*header)+1, "Max-Forwards: %u", --max_forwards);
+            log_error(LOG_LEVEL_HEADER, "Max-Forwards value for %s request reduced to %u.",
+               csp->http->gpc, max_forwards);
+         }
+         else if (max_forwards < 0)
+         {
+            log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
+            freez(*header);
           }
           else
           {
-            log_error(LOG_LEVEL_ERROR, "Non-intercepted %s request with Max-Forwards zero!", csp->http->gpc);
+            /*
+             * Not supposed to be reached. direct_response() which
+             * was already called earlier in chat() should have
+             * intercepted the request.
+             */
+            log_error(LOG_LEVEL_ERROR,
+               "Non-intercepted %s request with Max-Forwards zero!", csp->http->gpc);
+            assert(max_forwards != 0);
           }
        }
+      else
+      {
+         log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
+         freez(*header);
+      }
     }
  
     return JB_ERR_OK;
@@ -2855,7 +2992,8 @@ jb_err client_x_filter(struct client_state *csp, char **header)
        {
           if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
           {
-            log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to fetch without filtering!");
+            log_error(LOG_LEVEL_HEADER,
+               "force-text-mode overruled the client's request to fetch without filtering!");
           }
           else
           {  
@@ -3160,8 +3298,13 @@ jb_err server_http(struct client_state *csp, char **header)
   * Function    :  server_set_cookie
   *
   * Description :  Handle the server "cookie" header properly.
- *                Log cookie to the jar file.  Then "crunch" it,
- *                or accept it.  Called from `sed'.
+ *                Log cookie to the jar file.  Then "crunch",
+ *                accept or rewrite it to a session cookie.
+ *                Called from `sed'.
+ *
+ *                TODO: Allow the user to specify a new expiration
+ *                time to cause the cookie to expire even before the
+ *                browser is closed.
   *
   * Parameters  :
   *          1  :  csp = Current client state (buffers, headers, etc...)
@@ -3176,6 +3319,12 @@ jb_err server_http(struct client_state *csp, char **header)
   *********************************************************************/
  jb_err server_set_cookie(struct client_state *csp, char **header)
  {
+   time_t now;
+   time_t cookie_time; 
+   struct tm tm_now; 
+   struct tm tm_cookie;
+   time(&now);
+
  #ifdef FEATURE_COOKIE_JAR
     if (csp->config->jar)
     {
@@ -3186,9 +3335,7 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
         * the %z field in strftime()
         */
        char tempbuf[ BUFFER_SIZE ];
-      time_t now; 
-      struct tm tm_now; 
-      time (&now); 
+ 
  #ifdef HAVE_LOCALTIME_R
        tm_now = *localtime_r(&now, &tm_now);
  #elif FEATURE_PTHREAD
@@ -3249,22 +3396,122 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
              next_tag = cur_tag + strlen(cur_tag);
           }
  
-         /* Is this the "Expires" tag? */
+         /*
+          * Check the expiration date to see
+          * if the cookie is still valid, if yes,
+          * rewrite it to a session cookie.
+          */
           if (strncmpic(cur_tag, "expires=", 8) == 0)
           {
-            /* Delete the tag by copying the rest of the string over it.
-             * (Note that we cannot just use "strcpy(cur_tag, next_tag)",
-             * since the behaviour of strcpy is undefined for overlapping
-             * strings.)
+            char *match;
+            /*
+             * Try the valid time formats we know about.
+             *
+             * XXX: Maybe the log messages should be removed
+             * for the next stable release. They just exist to
+             * see which time format gets the most hits and
+             * should be checked for first.
               */
-            memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+            if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%y %H:%M:%S ", &tm_cookie)))
+            {
+               log_error(LOG_LEVEL_HEADER,
+                  "cookie \'%s\' send by %s appears to be using time format 1.",
+                  csp->http->url, *header);
+            }
+            else if (NULL != (match = strptime(cur_tag, "expires=%A, %e-%b-%Y %H:%M:%S ", &tm_cookie)))
+            {
+               log_error(LOG_LEVEL_HEADER,
+                  "cookie \'%s\' send by %s appears to be using time format 2.",
+                  csp->http->url, *header);
  
-            /* That changed the header, need to issue a log message */
-            changed = 1;
+            }
+            else if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%Y %H:%M:%S ", &tm_cookie)))
+            {
+               log_error(LOG_LEVEL_HEADER,
+                  "cookie \'%s\' send by %s appears to be using time format 3.",
+                  csp->http->url, *header);
+            }
+
+            /* Did any of them match? */
+            if (NULL == match)
+            {
+               /*
+                * Nope, treat it as if it was still valid.
+                *
+                * XXX: Should we remove the whole cookie instead?
+                */
+               log_error(LOG_LEVEL_ERROR,
+                  "Can't parse %s. Unsupported time format?", cur_tag);
+               memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+               changed = 1;
+            }
+            else
+            {
+               /*
+                * Yes. Check if the cookie is still valid.
+                *
+                * If the cookie is already expired it's probably
+                * a delete cookie and even if it isn't, the browser
+                * will discard it anyway.
+                */
+
+               /*
+                * XXX: timegm() isn't available on some AmigaOS
+                * versions and our replacement doesn't work.
+                *
+                * Our options are to either:
+                *
+                * - disable session-cookies-only completely if timegm
+                *   is missing,
+                *
+                * - to simply remove all expired tags, like it has
+                *   been done until Privoxy 3.0.6 and to live with
+                *    the consequence that it can cause login/logout
+                *   problems on servers that don't validate their
+                *   input properly, or
+                *
+                * - to replace it with mktime in which
+                *   case there is a slight chance of valid cookies
+                *   passing as already expired.
+                *
+                *   This is the way it's currently done and it's not
+                *   as bad as it sounds. If the missing GMT offset is
+                *   enough to change the result of the expiration check
+                *   the cookie will be only valid for a few hours
+                *   anyway, which in many cases will be shorter
+                *   than a browser session.
+                */
+               cookie_time = timegm(&tm_cookie);
+               if (cookie_time - now < 0)
+               {
+                  log_error(LOG_LEVEL_HEADER,
+                     "Cookie \'%s\' is already expired and can pass unmodified.", *header);
+                  /* Just in case some clown sets more then one expiration date */
+                  cur_tag = next_tag;
+               }
+               else
+               {
+                  log_error(LOG_LEVEL_HEADER,
+                     "Cookie \'%s\' is still valid and has to be rewritten.", *header);
+
+                  /*
+                   * Delete the tag by copying the rest of the string over it.
+                   * (Note that we cannot just use "strcpy(cur_tag, next_tag)",
+                   * since the behaviour of strcpy is undefined for overlapping
+                   * strings.)
+                   */
+                  memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+
+                  /* That changed the header, need to issue a log message */
+                  changed = 1;
+
+                  /*
+                   * Note that the next tag has now been moved to *cur_tag,
+                   * so we do not need to update the cur_tag pointer.
+                   */
+               }
+            }
  
-            /* Note that the next tag has now been moved to *cur_tag,
-             * so we do not need to update the cur_tag pointer.
-             */
           }
           else
           {
@@ -3275,7 +3522,9 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
  
        if (changed)
        {
-         log_error(LOG_LEVEL_HEADER, "Changed cookie to a temporary one.");
+         assert(NULL != *header);
+         log_error(LOG_LEVEL_HEADER, "Cookie rewritten to a temporary one: %s",
+            *header);
        }
     }