X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=parsers.c;h=58569863fa8d887d7f0627da96367f742461c08b;hp=33d832cdc7aadde37695758ec1f9723deac6204a;hb=7771ace7e31aca3163179f61afb58ceea217945a;hpb=cd9da7427a00d65369695af0db5530edac128a18

diff --git a/parsers.c b/parsers.c
index 33d832cd..58569863 100644
--- a/parsers.c
+++ b/parsers.c
@@ -1,4 +1,4 @@
-const char parsers_rcs[] = "$Id: parsers.c,v 1.76 2006/12/06 19:52:25 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.89 2007/02/07 16:52:11 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/parsers.c,v $
@@ -18,7 +18,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.76 2006/12/06 19:52:25 fabiankeil
  *                   `client_if_none_match', `get_destination_from_headers',
  *                   `parse_header_time' and `server_set_cookie'.
  *
- * Copyright   :  Written by and Copyright (C) 2001-2006 the SourceForge
+ * Copyright   :  Written by and Copyright (C) 2001-2007 the SourceForge
  *                Privoxy team. http://www.privoxy.org/
  *
  *                Based on the Internet Junkbuster originally written
@@ -45,6 +45,76 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.76 2006/12/06 19:52:25 fabiankeil
  *
  * Revisions   :
  *    $Log: parsers.c,v $
+ *    Revision 1.89  2007/02/07 16:52:11  fabiankeil
+ *    Fix log messages regarding the cookie time format
+ *    (cookie and request URL were mixed up).
+ *
+ *    Revision 1.88  2007/02/07 11:27:12  fabiankeil
+ *    - Let decompress_iob()
+ *      - not corrupt the content if decompression fails
+ *        early. (the first byte(s) were lost).
+ *      - use pointer arithmetics with defined outcome for
+ *        a change.
+ *    - Use a different kludge to remember a failed decompression.
+ *
+ *    Revision 1.87  2007/01/31 16:21:38  fabiankeil
+ *    Search for Max-Forwards headers case-insensitive,
+ *    don't generate the "501 unsupported" message for invalid
+ *    Max-Forwards values and don't increase negative ones.
+ *
+ *    Revision 1.86  2007/01/30 13:05:26  fabiankeil
+ *    - Let server_set_cookie() check the expiration date
+ *      of cookies and don't touch the ones that are already
+ *      expired. Fixes problems with low quality web applications
+ *      as described in BR 932612.
+ *
+ *    - Adjust comment in client_max_forwards to reality;
+ *      remove invalid Max-Forwards headers.
+ *
+ *    Revision 1.85  2007/01/26 15:33:46  fabiankeil
+ *    Stop filter_header() from unintentionally removing
+ *    empty header lines that were enlisted by the continue
+ *    hack.
+ *
+ *    Revision 1.84  2007/01/24 12:56:52  fabiankeil
+ *    - Repeat the request URL before logging any headers.
+ *      Makes reading the log easier in case of simultaneous requests.
+ *    - If there are more than one Content-Type headers in one request,
+ *      use the first one and remove the others.
+ *    - Remove "newval" variable in server_content_type().
+ *      It's only used once.
+ *
+ *    Revision 1.83  2007/01/12 15:03:02  fabiankeil
+ *    Correct a cast, check inflateEnd() exit code
+ *    to see if we have to, replace sprintf calls
+ *    with snprintf.
+ *
+ *    Revision 1.82  2007/01/01 19:36:37  fabiankeil
+ *    Integrate a modified version of Wil Mahan's
+ *    zlib patch (PR #895531).
+ *
+ *    Revision 1.81  2006/12/31 22:21:33  fabiankeil
+ *    Skip empty filter files in filter_header()
+ *    but don't ignore the ones that come afterwards.
+ *    Fixes BR 1619208, this time for real.
+ *
+ *    Revision 1.80  2006/12/29 19:08:22  fabiankeil
+ *    Reverted parts of my last commit
+ *    to keep error handling working.
+ *
+ *    Revision 1.79  2006/12/29 18:04:40  fabiankeil
+ *    Fixed gcc43 conversion warnings.
+ *
+ *    Revision 1.78  2006/12/26 17:19:20  fabiankeil
+ *    Bringing back the "useless" localtime() call
+ *    I removed in revision 1.67. On some platforms
+ *    it's necessary to prevent time zone offsets.
+ *
+ *    Revision 1.77  2006/12/07 18:44:26  fabiankeil
+ *    Rebuild request URL in get_destination_from_headers()
+ *    to make sure redirect{pcrs command} works as expected
+ *    for intercepted requests.
+ *
  *    Revision 1.76  2006/12/06 19:52:25  fabiankeil
  *    Added get_destination_from_headers().
  *
@@ -551,6 +621,10 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.76 2006/12/06 19:52:25 fabiankeil
 #include <string.h>
 #include <time.h>
 
+#ifdef FEATURE_ZLIB
+#include <zlib.h>
+#endif
+
 #if !defined(_WIN32) && !defined(__OS2__)
 #include <unistd.h>
 #endif
@@ -618,7 +692,6 @@ const struct parsers server_patterns[] = {
    { "set-cookie:",              11, server_set_cookie },
    { "connection:",              11, crumble },
    { "Content-Type:",            13, server_content_type },
-   { "Content-Length:",          15, server_content_length },
    { "Content-MD5:",             12, server_content_md5 },
    { "Content-Encoding:",        17, server_content_encoding },
    { "Transfer-Encoding:",       18, server_transfer_coding },
@@ -633,6 +706,9 @@ const struct parsers server_patterns[] = {
 const struct parsers server_patterns_light[] = {
    { "Content-Length:",          15, server_content_length },
    { "Transfer-Encoding:",       18, server_transfer_coding },
+#ifdef FEATURE_ZLIB
+   { "Content-Encoding:",        17, server_content_encoding },
+#endif /* def FEATURE_ZLIB */
    { NULL, 0, NULL }
 };
 
@@ -714,9 +790,9 @@ jb_err add_to_iob(struct client_state *csp, char *buf, int n)
 
    if (n <= 0) return JB_ERR_OK;
 
-   used   = iob->eod - iob->buf;
-   offset = iob->cur - iob->buf;
-   need   = used + n + 1;
+   used   = (size_t)(iob->eod - iob->buf);
+   offset = (size_t)(iob->cur - iob->buf);
+   need   = used + (size_t)n + 1;
 
    /*
     * If the buffer can't hold the new data, extend it first.
@@ -766,6 +842,356 @@ jb_err add_to_iob(struct client_state *csp, char *buf, int n)
 }
 
 
+#ifdef FEATURE_ZLIB
+/*********************************************************************
+ *
+ * Function    :  decompress_iob
+ *
+ * Description :  Decompress buffered page, expanding the
+ *                buffer as necessary.  csp->iob->cur
+ *                should point to the the beginning of the
+ *                compressed data block.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns     :  JB_ERR_OK on success,
+ *                JB_ERR_MEMORY if out-of-memory limit reached, and
+ *                JB_ERR_COMPRESS if error decompressing buffer.
+ *
+ *********************************************************************/
+jb_err decompress_iob(struct client_state *csp)
+{
+   char  *buf;       /* new, uncompressed buffer */
+   char  *cur;       /* Current iob position (to keep the original 
+                      * iob->cur unmodified if we return early) */
+   size_t bufsize;   /* allocated size of the new buffer */
+   size_t skip_size; /* Number of bytes at the beginning of the iob
+                        that we should NOT decompress. */
+   int status;       /* return status of the inflate() call */
+   z_stream zstr;    /* used by calls to zlib */
+
+   bufsize = csp->iob->size;
+   skip_size = (size_t)(csp->iob->cur - csp->iob->buf);
+
+   cur = csp->iob->cur;
+
+   if (bufsize < 10)
+   {
+      /*
+       * This is to protect the parsing of gzipped data,
+       * but it should(?) be valid for deflated data also.
+       */
+      log_error (LOG_LEVEL_ERROR, "Buffer too small decompressing iob");
+      return JB_ERR_COMPRESS;
+   }
+
+   if (csp->content_type & CT_GZIP)
+   {
+      /*
+       * Our task is slightly complicated by the facts that data
+       * compressed by gzip does not include a zlib header, and
+       * that there is no easily accessible interface in zlib to
+       * handle a gzip header. We strip off the gzip header by
+       * hand, and later inform zlib not to expect a header.
+       */
+
+      /*
+       * Strip off the gzip header. Please see RFC 1952 for more
+       * explanation of the appropriate fields.
+       */
+      if ((*cur++ != (char)0x1f)
+       || (*cur++ != (char)0x8b)
+       || (*cur++ != Z_DEFLATED))
+      {
+         log_error (LOG_LEVEL_ERROR, "Invalid gzip header when decompressing");
+         return JB_ERR_COMPRESS;
+      }
+      else
+      {
+         int flags = *cur++;
+         /*
+          * XXX: These magic numbers should be replaced
+          * with macros to give a better idea what they do.
+          */
+         if (flags & 0xe0)
+         {
+            /* The gzip header has reserved bits set; bail out. */
+            log_error (LOG_LEVEL_ERROR, "Invalid gzip header flags when decompressing");
+            return JB_ERR_COMPRESS;
+         }
+         cur += 6;
+
+         /* Skip extra fields if necessary. */
+         if (flags & 0x04)
+         {
+            /*
+             * Skip a given number of bytes, specified
+             * as a 16-bit little-endian value.
+             */
+            /*
+             * XXX: This code used to be:
+             * 
+             * csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8);
+             *
+             * which I had to change into:
+             *
+             * cur += *cur++ + (*cur++ << 8);
+             *
+             * at which point gcc43 finally noticed that the value
+             * of cur is undefined (it depends on which of the
+             * summands is evaluated first).
+             *
+             * I haven't come across a site where this
+             * code is actually executed yet, but I hope
+             * it works anyway.
+             */
+            int skip_bytes;
+            skip_bytes = *cur++;
+            skip_bytes = *cur++ << 8;
+
+            assert(skip_bytes == *csp->iob->cur-2 + ((*csp->iob->cur-1) << 8));
+
+            /*
+             * The number of bytes to skip should be positive
+             * and we'd like to stay in the buffer.
+             */
+            if((skip_bytes < 0) || (skip_bytes >= (csp->iob->eod - cur)))
+            {
+               log_error (LOG_LEVEL_ERROR,
+                  "Unreasonable amount of bytes to skip (%d). Stopping decompression",
+                  skip_bytes);
+               return JB_ERR_COMPRESS;
+            }
+            log_error (LOG_LEVEL_INFO,
+               "Skipping %d bytes for gzip compression. Does this sound right?",
+               skip_bytes);
+            cur += skip_bytes;
+         }
+
+         /* Skip the filename if necessary. */
+         if (flags & 0x08)
+         {
+            /* A null-terminated string follows. */
+            while (*cur++);
+         }
+
+         /* Skip the comment if necessary. */
+         if (flags & 0x10)
+         {
+            while (*cur++);
+         }
+
+         /* Skip the CRC if necessary. */
+         if (flags & 0x02)
+         {
+            cur += 2;
+         }
+      }
+   }
+   else if (csp->content_type & CT_DEFLATE)
+   {
+      /*
+       * XXX: The debug level should be lowered
+       * before the next stable release.
+       */
+      log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *cur);
+      /*
+       * In theory (that is, according to RFC 1950), deflate-compressed
+       * data should begin with a two-byte zlib header and have an
+       * adler32 checksum at the end. It seems that in practice only
+       * the raw compressed data is sent. Note that this means that
+       * we are not RFC 1950-compliant here, but the advantage is that
+       * this actually works. :)
+       *
+       * We add a dummy null byte to tell zlib where the data ends,
+       * and later inform it not to expect a header.
+       *
+       * Fortunately, add_to_iob() has thoughtfully null-terminated
+       * the buffer; we can just increment the end pointer to include
+       * the dummy byte.  
+       */
+      csp->iob->eod++;
+   }
+   else
+   {
+      log_error (LOG_LEVEL_ERROR,
+         "Unable to determine compression format for decompression");
+      return JB_ERR_COMPRESS;
+   }
+
+   /* Set up the fields required by zlib. */
+   zstr.next_in  = (Bytef *)cur;
+   zstr.avail_in = (unsigned int)(csp->iob->eod - cur);
+   zstr.zalloc   = Z_NULL;
+   zstr.zfree    = Z_NULL;
+   zstr.opaque   = Z_NULL;
+
+   /*
+    * Passing -MAX_WBITS to inflateInit2 tells the library
+    * that there is no zlib header.
+    */
+   if (inflateInit2 (&zstr, -MAX_WBITS) != Z_OK)
+   {
+      log_error (LOG_LEVEL_ERROR, "Error initializing decompression");
+      return JB_ERR_COMPRESS;
+   }
+
+   /*
+    * Next, we allocate new storage for the inflated data.
+    * We don't modify the existing iob yet, so in case there
+    * is error in decompression we can recover gracefully.
+    */
+   buf = zalloc (bufsize);
+   if (NULL == buf)
+   {
+      log_error (LOG_LEVEL_ERROR, "Out of memory decompressing iob");
+      return JB_ERR_MEMORY;
+   }
+
+   assert(bufsize >= skip_size);
+   memcpy(buf, csp->iob->buf, skip_size);
+   zstr.avail_out = bufsize - skip_size;
+   zstr.next_out  = (Bytef *)buf + skip_size;
+
+   /* Try to decompress the whole stream in one shot. */
+   while (Z_BUF_ERROR == (status = inflate(&zstr, Z_FINISH)))
+   {
+      /* We need to allocate more memory for the output buffer. */
+
+      char *tmpbuf;                /* used for realloc'ing the buffer */
+      size_t oldbufsize = bufsize; /* keep track of the old bufsize */
+
+      /*
+       * If zlib wants more data then there's a problem, because
+       * the complete compressed file should have been buffered.
+       */
+      if (0 == zstr.avail_in)
+      {
+         log_error(LOG_LEVEL_ERROR, "Unexpected end of compressed iob");
+         return JB_ERR_COMPRESS;
+      }
+
+      /*
+       * If we tried the limit and still didn't have enough
+       * memory, just give up.
+       */
+      if (bufsize == csp->config->buffer_limit)
+      {
+         log_error(LOG_LEVEL_ERROR, "Buffer limit reached while decompressing iob");
+         return JB_ERR_MEMORY;
+      }
+
+      /* Try doubling the buffer size each time. */
+      bufsize *= 2;
+
+      /* Don't exceed the buffer limit. */
+      if (bufsize > csp->config->buffer_limit)
+      {
+         bufsize = csp->config->buffer_limit;
+      }
+    
+      /* Try to allocate the new buffer. */
+      tmpbuf = realloc(buf, bufsize);
+      if (NULL == tmpbuf)
+      {
+         log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob");
+         freez(buf);
+         return JB_ERR_MEMORY;
+      }
+      else
+      {
+         char *oldnext_out = (char *)zstr.next_out;
+
+         /*
+          * Update the fields for inflate() to use the new
+          * buffer, which may be in a location different from
+          * the old one.
+          */
+         zstr.avail_out += bufsize - oldbufsize;
+         zstr.next_out   = (Bytef *)tmpbuf + bufsize - zstr.avail_out;
+
+         /*
+          * Compare with an uglier method of calculating these values
+          * that doesn't require the extra oldbufsize variable.
+          */
+         assert(zstr.avail_out == tmpbuf + bufsize - (char *)zstr.next_out);
+         assert((char *)zstr.next_out == tmpbuf + ((char *)oldnext_out - buf));
+         assert(zstr.avail_out > 0);
+
+         buf = tmpbuf;
+      }
+   }
+
+   if (Z_STREAM_ERROR == inflateEnd(&zstr))
+   {
+      log_error(LOG_LEVEL_ERROR,
+         "Inconsistent stream state after decompression: %s", zstr.msg);
+      /*
+       * XXX: Intentionally no return.
+       *
+       * According to zlib.h, Z_STREAM_ERROR is returned
+       * "if the stream state was inconsistent".
+       *
+       * I assume in this case inflate()'s status
+       * would also be something different than Z_STREAM_END
+       * so this check should be redundant, but lets see.
+       */
+   }
+
+   if (status != Z_STREAM_END)
+   {
+      /* We failed to decompress the stream. */
+      log_error(LOG_LEVEL_ERROR,
+         "Error in decompressing to the buffer (iob): %s", zstr.msg);
+      return JB_ERR_COMPRESS;
+   }
+
+   /*
+    * Finally, we can actually update the iob, since the
+    * decompression was successful. First, free the old
+    * buffer.
+    */
+   freez(csp->iob->buf);
+
+   /* Now, update the iob to use the new buffer. */
+   csp->iob->buf  = buf;
+   csp->iob->cur  = csp->iob->buf + skip_size;
+   csp->iob->eod  = (char *)zstr.next_out;
+   csp->iob->size = bufsize;
+  
+   /*
+    * Make sure the new uncompressed iob obeys some minimal
+    * consistency conditions.
+    */
+   if ((csp->iob->buf <  csp->iob->cur)
+    && (csp->iob->cur <= csp->iob->eod)
+    && (csp->iob->eod <= csp->iob->buf + csp->iob->size))
+   {
+      char t = csp->iob->cur[100];
+      csp->iob->cur[100] = '\0';
+      /*
+       * XXX: The debug level should be lowered
+       * before the next stable release.
+       */
+      log_error(LOG_LEVEL_INFO, "Sucessfully decompressed: %s", csp->iob->cur);
+      csp->iob->cur[100] = t;
+      return JB_ERR_OK;
+   }
+   else
+   {
+      /* It seems that zlib did something weird. */
+      log_error(LOG_LEVEL_ERROR,
+         "Unexpected error decompressing the buffer (iob): %d==%d, %d>%d, %d<%d",
+         csp->iob->cur, csp->iob->buf + skip_size, csp->iob->eod, csp->iob->buf,
+         csp->iob->eod, csp->iob->buf + csp->iob->size);
+      return JB_ERR_COMPRESS;
+   }
+
+}
+#endif /* defined(FEATURE_ZLIB) */
+
+
 /*********************************************************************
  *
  * Function    :  get_header
@@ -910,6 +1336,7 @@ char *sed(const struct parsers pats[],
 
    if (first_run) /* Parse and print */
    {
+      log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url);
       for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
       {
          for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
@@ -940,7 +1367,7 @@ char *sed(const struct parsers pats[],
        */
       if (strncmpic(csp->http->cmd, "HEAD", 4))
       {
-         /*XXX: Code duplication*/
+         /*XXX: Code duplication */
          for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
          {
             for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
@@ -1074,14 +1501,25 @@ jb_err filter_header(struct client_state *csp, char **header)
    if (0 == found_filters)
    {
       log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering.");
-         return(JB_ERR_OK);
+      return(JB_ERR_OK);
    }
 
    for (i = 0; i < MAX_AF_FILES; i++)
    {
       fl = csp->rlist[i];
       if ((NULL == fl) || (NULL == fl->f))
-         break;
+      {
+         /*
+          * Either there are no filter files
+          * left, or this filter file just
+          * contains no valid filters.
+          *
+          * Continue to be sure we don't miss
+          * valid filter files that are chained
+          * after empty or invalid ones.
+          */
+         continue;
+      }
       /*
        * For all applying +filter actions, look if a filter by that
        * name exists and if yes, execute its pcrs_joblist on the
@@ -1140,7 +1578,12 @@ jb_err filter_header(struct client_state *csp, char **header)
       }
    }
 
-   if ( 0 == size )
+   /*
+    * Additionally checking for hits is important because if
+    * the continue hack is triggered, server headers can
+    * arrive empty to separate multiple heads from each other.
+    */
+   if ((0 == size) && hits)
    {
       log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header);
       freez(*header);
@@ -1210,12 +1653,14 @@ jb_err crunch_server_header(struct client_state *csp, char **header)
 
    return JB_ERR_OK;
 }
+
+
 /*********************************************************************
  *
  * Function    :  server_content_type
  *
  * Description :  Set the content-type for filterable types (text/.*,
- *                javascript and image/gif) unless filtering has been
+ *                .*xml.*, javascript and image/gif) unless filtering has been
  *                forbidden (CT_TABOO) while parsing earlier headers.
  *                NOTE: Since text/plain is commonly used by web servers
  *                      for files whose correct type is unknown, we don't
@@ -1234,38 +1679,59 @@ jb_err crunch_server_header(struct client_state *csp, char **header)
  *********************************************************************/
 jb_err server_content_type(struct client_state *csp, char **header)
 {
-   const char *newval;
-   
-   newval = csp->action->string[ACTION_STRING_CONTENT_TYPE]; 
+   /* Remove header if it isn't the first Content-Type header */
+   if(csp->content_type && (csp->content_type != CT_TABOO))
+   {
+     /*
+      * Another, slightly slower, way to see if
+      * we already parsed another Content-Type header.
+      */
+      assert(NULL != get_header_value(csp->headers, "Content-Type:"));
+
+      log_error(LOG_LEVEL_ERROR,
+         "Multiple Content-Type headers. Removing and ignoring: \'%s\'",
+         *header);
+      freez(*header);
 
-   if (csp->content_type != CT_TABOO)
+      return JB_ERR_OK;
+   }
+
+   if (!(csp->content_type & CT_TABOO))
    {
       if ((strstr(*header, " text/") && !strstr(*header, "plain"))
-       || strstr(*header, "xml")
-       || strstr(*header, "application/x-javascript"))
-         csp->content_type = CT_TEXT;
+        || strstr(*header, "xml")
+        || strstr(*header, "application/x-javascript"))
+      {
+         csp->content_type |= CT_TEXT;
+      }
       else if (strstr(*header, " image/gif"))
-         csp->content_type = CT_GIF;
+      {
+         csp->content_type |= CT_GIF;
+      }
       else if (strstr(*header, " image/jpeg"))
-         csp->content_type = CT_JPEG;
+      {
+         csp->content_type |= CT_JPEG;
+      }
       else
+      {
          csp->content_type = 0;
+      }
    }
    /*
     * Are we enabling text mode by force?
     */
    if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
    {
-     /*
-      * Do we really have to?
-      */
-      if (csp->content_type == CT_TEXT)
+      /*
+       * Do we really have to?
+       */
+      if (csp->content_type & CT_TEXT)
       {
          log_error(LOG_LEVEL_HEADER, "Text mode is already enabled.");   
       }
       else
       {
-         csp->content_type = CT_TEXT;
+         csp->content_type |= CT_TEXT;
          log_error(LOG_LEVEL_HEADER, "Text mode enabled by force. Take cover!");   
       }
    }
@@ -1274,28 +1740,28 @@ jb_err server_content_type(struct client_state *csp, char **header)
     */ 
    if (csp->action->flags & ACTION_CONTENT_TYPE_OVERWRITE)
    { 
-     /*
-      * Make sure the user doesn't accidently
-      * change the content type of binary documents. 
-      */ 
-     if (csp->content_type == CT_TEXT)
-     { 
-        freez(*header);
-        *header = strdup("Content-Type: ");
-        string_append(header, newval);
-        
-        if (header == NULL)
-        { 
-           log_error(LOG_LEVEL_HEADER, "Insufficient memory. Conten-Type crunched without replacement!");
-           return JB_ERR_MEMORY;
-        }
-        log_error(LOG_LEVEL_HEADER, "Modified: %s!", *header);
-     }
-     else
-     {
-        log_error(LOG_LEVEL_HEADER, "%s not replaced. It doesn't look like text. "
-                 "Enable force-text-mode if you know what you're doing.", *header);   
-     }
+      /*
+       * Make sure the user doesn't accidently
+       * change the content type of binary documents. 
+       */ 
+      if (csp->content_type & CT_TEXT)
+      { 
+         freez(*header);
+         *header = strdup("Content-Type: ");
+         string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]);
+
+         if (header == NULL)
+         { 
+            log_error(LOG_LEVEL_HEADER, "Insufficient memory to replace Content-Type!");
+            return JB_ERR_MEMORY;
+         }
+         log_error(LOG_LEVEL_HEADER, "Modified: %s!", *header);
+      }
+      else
+      {
+         log_error(LOG_LEVEL_HEADER, "%s not replaced. It doesn't look like text. "
+            "Enable force-text-mode if you know what you're doing.", *header);   
+      }
    }  
    return JB_ERR_OK;
 }
@@ -1328,6 +1794,13 @@ jb_err server_transfer_coding(struct client_state *csp, char **header)
     */
    if (strstr(*header, "gzip") || strstr(*header, "compress") || strstr(*header, "deflate"))
    {
+#ifdef FEATURE_ZLIB
+      /*
+       * XXX: Added to test if we could use CT_GZIP and CT_DEFLATE here.
+       */
+      log_error(LOG_LEVEL_INFO, "Marking content type for %s as CT_TABOO because of %s.",
+         csp->http->cmd, *header);
+#endif /* def FEATURE_ZLIB */
       csp->content_type = CT_TABOO;
    }
 
@@ -1340,7 +1813,10 @@ jb_err server_transfer_coding(struct client_state *csp, char **header)
 
       /*
        * If the body was modified, it has been de-chunked first
-       * and the header must be removed. 
+       * and the header must be removed.
+       *
+       * FIXME: If there is more than one transfer encoding,
+       * only the "chunked" part should be removed here.
        */
       if (csp->flags & CSP_FLAG_MODIFIED)
       {
@@ -1357,7 +1833,16 @@ jb_err server_transfer_coding(struct client_state *csp, char **header)
  *
  * Function    :  server_content_encoding
  *
- * Description :  Prohibit filtering (CT_TABOO) if content encoding compresses
+ * Description :  This function is run twice for each request,
+ *                unless FEATURE_ZLIB and filtering are disabled.
+ *
+ *                The first run is used to check if the content
+ *                is compressed, if FEATURE_ZLIB is disabled
+ *                filtering is then disabled as well, if FEATURE_ZLIB
+ *                is enabled the content is marked for decompression.
+ *                
+ *                The second run is used to remove the Content-Encoding
+ *                header if the decompression was successful.
  *
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
@@ -1372,13 +1857,48 @@ jb_err server_transfer_coding(struct client_state *csp, char **header)
  *********************************************************************/
 jb_err server_content_encoding(struct client_state *csp, char **header)
 {
-   /*
-    * Turn off pcrs and gif filtering if body compressed
-    */
+#ifdef FEATURE_ZLIB
+   if ((csp->flags & CSP_FLAG_MODIFIED)
+    && (csp->content_type & (CT_GZIP | CT_DEFLATE)))
+   {
+      /*
+       * We successfully decompressed the content,
+       * and have to clean the header now, so the
+       * client no longer expects compressed data..
+       *
+       * XXX: There is a difference between cleaning
+       * and removing it completely.
+       */
+      log_error(LOG_LEVEL_HEADER, "Crunching: %s", *header);
+      freez(*header);
+   }
+   else if (strstr(*header, "gzip"))
+   {
+      /* Mark for gzip decompression */
+      csp->content_type |= CT_GZIP;
+   }
+   else if (strstr(*header, "deflate"))
+   {
+      /* Mark for zlib decompression */
+      csp->content_type |= CT_DEFLATE;
+   }
+   else if (strstr(*header, "compress"))
+   {
+      /*
+       * We can't decompress this; therefore we can't filter
+       * it either.
+       */
+      csp->content_type |= CT_TABOO;
+   }
+#else /* !defined(FEATURE_ZLIB) */
    if (strstr(*header, "gzip") || strstr(*header, "compress") || strstr(*header, "deflate"))
    {
-      csp->content_type = CT_TABOO;
+      /*
+       * Body is compressed, turn off pcrs and gif filtering.
+       */
+      csp->content_type |= CT_TABOO;
    }
+#endif /* !defined(FEATURE_ZLIB) */
 
    return JB_ERR_OK;
 
@@ -1405,22 +1925,22 @@ jb_err server_content_encoding(struct client_state *csp, char **header)
  *********************************************************************/
 jb_err server_content_length(struct client_state *csp, char **header)
 {
-   if (csp->content_length != 0) /* Content length could have been modified */
+   const size_t max_header_length = 80;
+
+   /* Regenerate header if the content was modified. */
+   if (csp->flags & CSP_FLAG_MODIFIED)
    {
-      /*
-       * XXX: Shouldn't we check if csp->content_length
-       * is different than the original value?
-       */
       freez(*header);
-      *header = (char *) zalloc(100);
+      *header = (char *) zalloc(max_header_length);
       if (*header == NULL)
       {
          return JB_ERR_MEMORY;
       }
 
-      sprintf(*header, "Content-Length: %d", (int) csp->content_length);
-
-      log_error(LOG_LEVEL_HEADER, "Adjust Content-Length to %d", (int) csp->content_length);
+      snprintf(*header, max_header_length, "Content-Length: %d",
+         (int)csp->content_length);
+      log_error(LOG_LEVEL_HEADER, "Adjusted Content-Length to %d",
+         (int)csp->content_length);
    }
 
    return JB_ERR_OK;
@@ -1611,7 +2131,7 @@ jb_err server_last_modified(struct client_state *csp, char **header)
       }
       else
       {
-         rtime = difftime(now, last_modified);
+         rtime = (long int)difftime(now, last_modified);
          if (rtime)
          {
             rtime = pick_from_range(rtime);
@@ -1913,11 +2433,13 @@ jb_err client_accept_language(struct client_state *csp, char **header)
 
       if (*header == NULL)
       {
-         log_error(LOG_LEVEL_ERROR, " Insufficent memory. Accept-Language header crunched without replacement.");  
+         log_error(LOG_LEVEL_ERROR,
+            "Insufficent memory. Accept-Language header crunched without replacement.");  
       }
       else
       {
-         log_error(LOG_LEVEL_HEADER, "Accept-Language header crunched and replaced with: %s", *header);
+         log_error(LOG_LEVEL_HEADER,
+            "Accept-Language header crunched and replaced with: %s", *header);
       }
    }
    return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
@@ -2186,23 +2708,42 @@ jb_err client_x_forwarded(struct client_state *csp, char **header)
  *********************************************************************/
 jb_err client_max_forwards(struct client_state *csp, char **header)
 {
-   unsigned int max_forwards;
+   int max_forwards;
 
-   if ((0 == strcmpic(csp->http->gpc, "trace"))
-      || (0 == strcmpic(csp->http->gpc, "options")))
+   if ((0 == strcmpic(csp->http->gpc, "trace")) ||
+       (0 == strcmpic(csp->http->gpc, "options")))
    {
-      if (1 == sscanf(*header, "Max-Forwards: %u", &max_forwards))
+      assert(*(*header+12) == ':');
+      if (1 == sscanf(*header+12, ": %u", &max_forwards))
       {
-         if (max_forwards-- >= 1)
+         if (max_forwards > 0)
          {
-            sprintf(*header, "Max-Forwards: %u", max_forwards);
-            log_error(LOG_LEVEL_HEADER, "Max forwards of %s request now %d", csp->http->gpc, max_forwards);
+            snprintf(*header, strlen(*header)+1, "Max-Forwards: %u", --max_forwards);
+            log_error(LOG_LEVEL_HEADER, "Max-Forwards value for %s request reduced to %u.",
+               csp->http->gpc, max_forwards);
+         }
+         else if (max_forwards < 0)
+         {
+            log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
+            freez(*header);
          }
          else
          {
-            log_error(LOG_LEVEL_ERROR, "Non-intercepted %s request with Max-Forwards zero!", csp->http->gpc);
+            /*
+             * Not supposed to be reached. direct_response() which
+             * was already called earlier in chat() should have
+             * intercepted the request.
+             */
+            log_error(LOG_LEVEL_ERROR,
+               "Non-intercepted %s request with Max-Forwards zero!", csp->http->gpc);
+            assert(max_forwards != 0);
          }
       }
+      else
+      {
+         log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
+         freez(*header);
+      }
    }
 
    return JB_ERR_OK;
@@ -2460,7 +3001,8 @@ jb_err client_x_filter(struct client_state *csp, char **header)
       {
          if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
          {
-            log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to fetch without filtering!");
+            log_error(LOG_LEVEL_HEADER,
+               "force-text-mode overruled the client's request to fetch without filtering!");
          }
          else
          {  
@@ -2765,8 +3307,13 @@ jb_err server_http(struct client_state *csp, char **header)
  * Function    :  server_set_cookie
  *
  * Description :  Handle the server "cookie" header properly.
- *                Log cookie to the jar file.  Then "crunch" it,
- *                or accept it.  Called from `sed'.
+ *                Log cookie to the jar file.  Then "crunch",
+ *                accept or rewrite it to a session cookie.
+ *                Called from `sed'.
+ *
+ *                TODO: Allow the user to specify a new expiration
+ *                time to cause the cookie to expire even before the
+ *                browser is closed.
  *
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
@@ -2781,6 +3328,12 @@ jb_err server_http(struct client_state *csp, char **header)
  *********************************************************************/
 jb_err server_set_cookie(struct client_state *csp, char **header)
 {
+   time_t now;
+   time_t cookie_time; 
+   struct tm tm_now; 
+   struct tm tm_cookie;
+   time(&now);
+
 #ifdef FEATURE_COOKIE_JAR
    if (csp->config->jar)
    {
@@ -2791,9 +3344,7 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
        * the %z field in strftime()
        */
       char tempbuf[ BUFFER_SIZE ];
-      time_t now; 
-      struct tm tm_now; 
-      time (&now); 
+ 
 #ifdef HAVE_LOCALTIME_R
       tm_now = *localtime_r(&now, &tm_now);
 #elif FEATURE_PTHREAD
@@ -2854,22 +3405,122 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
             next_tag = cur_tag + strlen(cur_tag);
          }
 
-         /* Is this the "Expires" tag? */
+         /*
+          * Check the expiration date to see
+          * if the cookie is still valid, if yes,
+          * rewrite it to a session cookie.
+          */
          if (strncmpic(cur_tag, "expires=", 8) == 0)
          {
-            /* Delete the tag by copying the rest of the string over it.
-             * (Note that we cannot just use "strcpy(cur_tag, next_tag)",
-             * since the behaviour of strcpy is undefined for overlapping
-             * strings.)
+            char *match;
+            /*
+             * Try the valid time formats we know about.
+             *
+             * XXX: Maybe the log messages should be removed
+             * for the next stable release. They just exist to
+             * see which time format gets the most hits and
+             * should be checked for first.
              */
-            memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+            if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%y %H:%M:%S ", &tm_cookie)))
+            {
+               log_error(LOG_LEVEL_HEADER,
+                  "cookie \'%s\' send by %s appears to be using time format 1.",
+                  *header, csp->http->url);
+            }
+            else if (NULL != (match = strptime(cur_tag, "expires=%A, %e-%b-%Y %H:%M:%S ", &tm_cookie)))
+            {
+               log_error(LOG_LEVEL_HEADER,
+                  "cookie \'%s\' send by %s appears to be using time format 2.",
+                  *header, csp->http->url);
 
-            /* That changed the header, need to issue a log message */
-            changed = 1;
+            }
+            else if (NULL != (match = strptime(cur_tag, "expires=%a, %e-%b-%Y %H:%M:%S ", &tm_cookie)))
+            {
+               log_error(LOG_LEVEL_HEADER,
+                  "cookie \'%s\' send by %s appears to be using time format 3.",
+                   *header, csp->http->url);
+            }
+
+            /* Did any of them match? */
+            if (NULL == match)
+            {
+               /*
+                * Nope, treat it as if it was still valid.
+                *
+                * XXX: Should we remove the whole cookie instead?
+                */
+               log_error(LOG_LEVEL_ERROR,
+                  "Can't parse %s. Unsupported time format?", cur_tag);
+               memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+               changed = 1;
+            }
+            else
+            {
+               /*
+                * Yes. Check if the cookie is still valid.
+                *
+                * If the cookie is already expired it's probably
+                * a delete cookie and even if it isn't, the browser
+                * will discard it anyway.
+                */
+
+               /*
+                * XXX: timegm() isn't available on some AmigaOS
+                * versions and our replacement doesn't work.
+                *
+                * Our options are to either:
+                *
+                * - disable session-cookies-only completely if timegm
+                *   is missing,
+                *
+                * - to simply remove all expired tags, like it has
+                *   been done until Privoxy 3.0.6 and to live with
+                *    the consequence that it can cause login/logout
+                *   problems on servers that don't validate their
+                *   input properly, or
+                *
+                * - to replace it with mktime in which
+                *   case there is a slight chance of valid cookies
+                *   passing as already expired.
+                *
+                *   This is the way it's currently done and it's not
+                *   as bad as it sounds. If the missing GMT offset is
+                *   enough to change the result of the expiration check
+                *   the cookie will be only valid for a few hours
+                *   anyway, which in many cases will be shorter
+                *   than a browser session.
+                */
+               cookie_time = timegm(&tm_cookie);
+               if (cookie_time - now < 0)
+               {
+                  log_error(LOG_LEVEL_HEADER,
+                     "Cookie \'%s\' is already expired and can pass unmodified.", *header);
+                  /* Just in case some clown sets more then one expiration date */
+                  cur_tag = next_tag;
+               }
+               else
+               {
+                  log_error(LOG_LEVEL_HEADER,
+                     "Cookie \'%s\' is still valid and has to be rewritten.", *header);
+
+                  /*
+                   * Delete the tag by copying the rest of the string over it.
+                   * (Note that we cannot just use "strcpy(cur_tag, next_tag)",
+                   * since the behaviour of strcpy is undefined for overlapping
+                   * strings.)
+                   */
+                  memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+
+                  /* That changed the header, need to issue a log message */
+                  changed = 1;
+
+                  /*
+                   * Note that the next tag has now been moved to *cur_tag,
+                   * so we do not need to update the cur_tag pointer.
+                   */
+               }
+            }
 
-            /* Note that the next tag has now been moved to *cur_tag,
-             * so we do not need to update the cur_tag pointer.
-             */
          }
          else
          {
@@ -2880,7 +3531,9 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
 
       if (changed)
       {
-         log_error(LOG_LEVEL_HEADER, "Changed cookie to a temporary one.");
+         assert(NULL != *header);
+         log_error(LOG_LEVEL_HEADER, "Cookie rewritten to a temporary one: %s",
+            *header);
       }
    }
 
@@ -2905,9 +3558,12 @@ jb_err server_set_cookie(struct client_state *csp, char **header)
  *********************************************************************/
 int strclean(const char *string, const char *substring)
 {
-   int hits = 0, len = strlen(substring);
+   int hits = 0;
+   size_t len;
    char *pos, *p;
 
+   len = strlen(substring);
+
    while((pos = strstr(string, substring)) != NULL)
    {
       p = pos + len;
@@ -2936,7 +3592,7 @@ int strclean(const char *string, const char *substring)
  *          2  :  tm = storage for the resulting time in seconds 
  *
  * Returns     :  Time struct containing the header time, or
- *                NULL in case of a parsing problem.
+ *                NULL in case of a parsing problems.
  *
  *********************************************************************/
 struct tm *parse_header_time(char *header, time_t *tm) {
@@ -2945,6 +3601,25 @@ struct tm *parse_header_time(char *header, time_t *tm) {
    struct tm gmt;
    struct tm * timeptr;
 
+   /*
+    * Initializing gmt to prevent time zone offsets.
+    *
+    * While this is only necessary on some platforms
+    * (mingw32 for example), I don't know how to
+    * detect these automatically and doing it everywhere
+    * shouldn't hurt.
+    */
+   time(tm); 
+#ifdef HAVE_LOCALTIME_R
+   gmt = *localtime_r(tm, &gmt);
+#elif FEATURE_PTHREAD
+   pthread_mutex_lock(&localtime_mutex);
+   gmt = *localtime(tm); 
+   pthread_mutex_unlock(&localtime_mutex);
+#else
+   gmt = *localtime(tm); 
+#endif
+
    /* Skipping header name */
    timestring = strstr(header, ": ");
    if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", &gmt) == NULL)
@@ -2954,7 +3629,7 @@ struct tm *parse_header_time(char *header, time_t *tm) {
    else
    {
       *tm = timegm(&gmt);
-      timeptr=&gmt;
+      timeptr = &gmt;
    }
    return(timeptr);