Fixes Bugs item #2816541 - http download stops at 0.3GB (mingw32)
[privoxy.git] / parsers.c
index c03a090..bb45a7b 100644 (file)
--- a/parsers.c
+++ b/parsers.c
@@ -1,4 +1,4 @@
-const char parsers_rcs[] = "$Id: parsers.c,v 1.164 2009/05/25 15:42:40 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.189 2009/07/05 12:04:46 fabiankeil Exp $";
 /*********************************************************************
  *
  * File        :  $Source: /cvsroot/ijbswa/current/parsers.c,v $
@@ -67,6 +67,15 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.164 2009/05/25 15:42:40 fabiankei
 
 #ifdef FEATURE_ZLIB
 #include <zlib.h>
+
+#define GZIP_IDENTIFIER_1       0x1f
+#define GZIP_IDENTIFIER_2       0x8b
+
+#define GZIP_FLAG_CHECKSUM      0x02
+#define GZIP_FLAG_EXTRA_FIELDS  0x04
+#define GZIP_FLAG_FILE_NAME     0x08
+#define GZIP_FLAG_COMMENT       0x10
+#define GZIP_FLAG_RESERVED_BITS 0xe0
 #endif
 
 #if !defined(_WIN32) && !defined(__OS2__)
@@ -147,6 +156,7 @@ static jb_err server_content_disposition(struct client_state *csp, char **header
 #ifdef FEATURE_CONNECTION_KEEP_ALIVE
 static jb_err server_save_content_length(struct client_state *csp, char **header);
 static jb_err server_keep_alive(struct client_state *csp, char **header);
+static jb_err client_keep_alive(struct client_state *csp, char **header);
 #endif /* def FEATURE_CONNECTION_KEEP_ALIVE */
 
 static jb_err client_host_adder       (struct client_state *csp);
@@ -163,6 +173,8 @@ static jb_err create_fake_referrer(char **header, const char *fake_referrer);
 static jb_err handle_conditional_hide_referrer_parameter(char **header,
    const char *host, const int parameter_conditional_block);
 static const char *get_appropiate_connection_header(const struct client_state *csp);
+static void create_content_length_header(unsigned long long content_length,
+                                         char *header, size_t buffer_length);
 
 /*
  * List of functions to run on a list of headers.
@@ -190,7 +202,9 @@ static const struct parsers client_patterns[] = {
    { "TE:",                       3,   client_te },
    { "Host:",                     5,   client_host },
    { "if-modified-since:",       18,   client_if_modified_since },
-#ifndef FEATURE_CONNECTION_KEEP_ALIVE
+#ifdef FEATURE_CONNECTION_KEEP_ALIVE
+   { "Keep-Alive:",              11,   client_keep_alive },
+#else
    { "Keep-Alive:",              11,   crumble },
 #endif
    { "connection:",              11,   client_connection },
@@ -423,8 +437,8 @@ jb_err decompress_iob(struct client_state *csp)
        * Strip off the gzip header. Please see RFC 1952 for more
        * explanation of the appropriate fields.
        */
-      if ((*cur++ != (char)0x1f)
-       || (*cur++ != (char)0x8b)
+      if (((*cur++ & 0xff) != GZIP_IDENTIFIER_1)
+       || ((*cur++ & 0xff) != GZIP_IDENTIFIER_2)
        || (*cur++ != Z_DEFLATED))
       {
          log_error(LOG_LEVEL_ERROR, "Invalid gzip header when decompressing");
@@ -433,48 +447,32 @@ jb_err decompress_iob(struct client_state *csp)
       else
       {
          int flags = *cur++;
-         /*
-          * XXX: These magic numbers should be replaced
-          * with macros to give a better idea what they do.
-          */
-         if (flags & 0xe0)
+         if (flags & GZIP_FLAG_RESERVED_BITS)
          {
             /* The gzip header has reserved bits set; bail out. */
             log_error(LOG_LEVEL_ERROR, "Invalid gzip header flags when decompressing");
             return JB_ERR_COMPRESS;
          }
+
+         /*
+          * Skip mtime (4 bytes), extra flags (1 byte)
+          * and OS type (1 byte).
+          */
          cur += 6;
 
          /* Skip extra fields if necessary. */
-         if (flags & 0x04)
+         if (flags & GZIP_FLAG_EXTRA_FIELDS)
          {
             /*
              * Skip a given number of bytes, specified
              * as a 16-bit little-endian value.
-             */
-            /*
-             * XXX: This code used to be:
-             * 
-             * csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8);
-             *
-             * which I had to change into:
              *
-             * cur += *cur++ + (*cur++ << 8);
-             *
-             * at which point gcc43 finally noticed that the value
-             * of cur is undefined (it depends on which of the
-             * summands is evaluated first).
-             *
-             * I haven't come across a site where this
-             * code is actually executed yet, but I hope
-             * it works anyway.
+             * XXX: this code is untested and should probably be removed.
              */
             int skip_bytes;
             skip_bytes = *cur++;
             skip_bytes += *cur++ << 8;
 
-            assert(skip_bytes == *csp->iob->cur - 2 + ((*csp->iob->cur - 1) << 8));
-
             /*
              * The number of bytes to skip should be positive
              * and we'd like to stay in the buffer.
@@ -493,22 +491,21 @@ jb_err decompress_iob(struct client_state *csp)
          }
 
          /* Skip the filename if necessary. */
-         if (flags & 0x08)
+         if (flags & GZIP_FLAG_FILE_NAME)
          {
             /* A null-terminated string is supposed to follow. */
             while (*cur++ && (cur < csp->iob->eod));
-
          }
 
          /* Skip the comment if necessary. */
-         if (flags & 0x10)
+         if (flags & GZIP_FLAG_COMMENT)
          {
             /* A null-terminated string is supposed to follow. */
             while (*cur++ && (cur < csp->iob->eod));
          }
 
          /* Skip the CRC if necessary. */
-         if (flags & 0x02)
+         if (flags & GZIP_FLAG_CHECKSUM)
          {
             cur += 2;
          }
@@ -568,7 +565,7 @@ jb_err decompress_iob(struct client_state *csp)
     * Passing -MAX_WBITS to inflateInit2 tells the library
     * that there is no zlib header.
     */
-   if (inflateInit2 (&zstr, -MAX_WBITS) != Z_OK)
+   if (inflateInit2(&zstr, -MAX_WBITS) != Z_OK)
    {
       log_error(LOG_LEVEL_ERROR, "Error initializing decompression");
       return JB_ERR_COMPRESS;
@@ -599,14 +596,15 @@ jb_err decompress_iob(struct client_state *csp)
       char *tmpbuf;                /* used for realloc'ing the buffer */
       size_t oldbufsize = bufsize; /* keep track of the old bufsize */
 
-      /*
-       * If zlib wants more data then there's a problem, because
-       * the complete compressed file should have been buffered.
-       */
       if (0 == zstr.avail_in)
       {
-         log_error(LOG_LEVEL_ERROR, "Unexpected end of compressed iob");
-         return JB_ERR_COMPRESS;
+         /*
+          * If zlib wants more data then there's a problem, because
+          * the complete compressed file should have been buffered.
+          */
+         log_error(LOG_LEVEL_ERROR,
+            "Unexpected end of compressed iob. Using what we got so far.");
+         break;
       }
 
       /*
@@ -676,11 +674,15 @@ jb_err decompress_iob(struct client_state *csp)
        */
    }
 
-   if (status != Z_STREAM_END)
+   if ((status != Z_STREAM_END) && (0 != zstr.avail_in))
    {
-      /* We failed to decompress the stream. */
+      /*
+       * We failed to decompress the stream and it's
+       * not simply because of missing data.
+       */
       log_error(LOG_LEVEL_ERROR,
-         "Error in decompressing to the buffer (iob): %s", zstr.msg);
+         "Unexpected error while decompressing to the buffer (iob): %s",
+         zstr.msg);
       return JB_ERR_COMPRESS;
    }
 
@@ -1154,6 +1156,25 @@ jb_err update_server_headers(struct client_state *csp)
       }
    }
 
+#ifdef FEATURE_CONNECTION_KEEP_ALIVE
+   if ((JB_ERR_OK == err)
+    && (csp->flags & CSP_FLAG_MODIFIED)
+    && (csp->flags & CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE)
+    && !(csp->flags & CSP_FLAG_SERVER_CONTENT_LENGTH_SET))
+   {
+      char header[50];
+
+      create_content_length_header(csp->content_length, header, sizeof(header));
+      err = enlist(csp->headers, header);
+      if (JB_ERR_OK == err)
+      {
+         log_error(LOG_LEVEL_HEADER,
+            "Content modified with no Content-Length header set. "
+            "Created: %s.", header);
+      }
+   }
+#endif /* def FEATURE_CONNECTION_KEEP_ALIVE */
+
    return err;
 }
 
@@ -1562,7 +1583,11 @@ static jb_err filter_header(struct client_state *csp, char **header)
  *********************************************************************/
 static jb_err server_connection(struct client_state *csp, char **header)
 {
-   if (!strcmpic(*header, "Connection: keep-alive"))
+   if (!strcmpic(*header, "Connection: keep-alive")
+#ifdef FEATURE_CONNECTION_KEEP_ALIVE
+    && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
+#endif
+      )
    {
 #ifdef FEATURE_CONNECTION_KEEP_ALIVE
       if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE))
@@ -1602,7 +1627,7 @@ static jb_err server_connection(struct client_state *csp, char **header)
  *
  * Function    :  server_keep_alive
  *
- * Description :  Stores the servers keep alive timeout.
+ * Description :  Stores the server's keep alive timeout.
  *
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
@@ -1644,6 +1669,54 @@ static jb_err server_keep_alive(struct client_state *csp, char **header)
 
    return JB_ERR_OK;
 }
+
+
+/*********************************************************************
+ *
+ * Function    :  client_keep_alive
+ *
+ * Description :  Stores the client's keep alive timeout.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  header = On input, pointer to header to modify.
+ *                On output, pointer to the modified header, or NULL
+ *                to remove the header.  This function frees the
+ *                original string if necessary.
+ *
+ * Returns     :  JB_ERR_OK.
+ *
+ *********************************************************************/
+static jb_err client_keep_alive(struct client_state *csp, char **header)
+{
+   unsigned int keep_alive_timeout;
+   const char *timeout_position = strstr(*header, ": ");
+
+   if ((NULL == timeout_position)
+    || (1 != sscanf(timeout_position, ": %u", &keep_alive_timeout)))
+   {
+      log_error(LOG_LEVEL_ERROR, "Couldn't parse: %s", *header);
+   }
+   else
+   {
+      if (keep_alive_timeout < csp->config->keep_alive_timeout)
+      {
+         log_error(LOG_LEVEL_HEADER,
+            "Reducing keep-alive timeout from %u to %u.",
+            csp->config->keep_alive_timeout, keep_alive_timeout);
+         csp->server_connection.keep_alive_timeout = keep_alive_timeout;
+      }
+      else
+      {
+         /* XXX: Is this log worthy? */
+         log_error(LOG_LEVEL_HEADER,
+            "Client keep-alive timeout is %u. Sticking with %u.",
+            keep_alive_timeout, csp->server_connection.keep_alive_timeout);
+      }
+   }
+
+   return JB_ERR_OK;
+}
 #endif /* def FEATURE_CONNECTION_KEEP_ALIVE */
 
 
@@ -2047,22 +2120,19 @@ static jb_err server_content_encoding(struct client_state *csp, char **header)
  *********************************************************************/
 static jb_err server_adjust_content_length(struct client_state *csp, char **header)
 {
-   const size_t max_header_length = 80;
-
    /* Regenerate header if the content was modified. */
    if (csp->flags & CSP_FLAG_MODIFIED)
    {
+      const size_t header_length = 50;
       freez(*header);
-      *header = (char *) zalloc(max_header_length);
+      *header = malloc(header_length);
       if (*header == NULL)
       {
          return JB_ERR_MEMORY;
       }
-
-      snprintf(*header, max_header_length, "Content-Length: %d",
-         (int)csp->content_length);
-      log_error(LOG_LEVEL_HEADER, "Adjusted Content-Length to %d",
-         (int)csp->content_length);
+      create_content_length_header(csp->content_length, *header, header_length);
+      log_error(LOG_LEVEL_HEADER,
+         "Adjusted Content-Length to %llu", csp->content_length);
    }
 
    return JB_ERR_OK;
@@ -2093,7 +2163,11 @@ static jb_err server_save_content_length(struct client_state *csp, char **header
 
    assert(*(*header+14) == ':');
 
+#ifdef _WIN32
+   if (1 != sscanf(*header+14, ": %I64u", &content_length))
+#else
    if (1 != sscanf(*header+14, ": %llu", &content_length))
+#endif
    {
       log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
       freez(*header);
@@ -2101,6 +2175,7 @@ static jb_err server_save_content_length(struct client_state *csp, char **header
    else
    {
       csp->expected_content_length = content_length;
+      csp->flags |= CSP_FLAG_SERVER_CONTENT_LENGTH_SET;
       csp->flags |= CSP_FLAG_CONTENT_LENGTH_SET;
    }
 
@@ -2230,7 +2305,6 @@ static jb_err server_last_modified(struct client_state *csp, char **header)
 #endif
    struct tm *timeptr = NULL;
    time_t now, last_modified;                  
-   long int rtime;
    long int days, hours, minutes, seconds;
    
    /*
@@ -2280,7 +2354,7 @@ static jb_err server_last_modified(struct client_state *csp, char **header)
       now = time(NULL);
 #ifdef HAVE_GMTIME_R
       gmtime_r(&now, &gmt);
-#elif def MUTEX_LOCKS_AVAILABLE
+#elif defined(MUTEX_LOCKS_AVAILABLE)
       privoxy_mutex_lock(&gmtime_mutex);
       gmtime(&now);
       privoxy_mutex_unlock(&gmtime_mutex);
@@ -2294,30 +2368,41 @@ static jb_err server_last_modified(struct client_state *csp, char **header)
       }
       else
       {
-         rtime = (long int)difftime(now, last_modified);
+         long int rtime = (long int)difftime(now, last_modified);
          if (rtime)
          {
-            int negative = 0;
+            const int negative_delta = (rtime < 0);
 
-            if (rtime < 0)
+            if (negative_delta)
             {
                rtime *= -1; 
-               negative = 1;
                log_error(LOG_LEVEL_HEADER, "Server time in the future.");
             }
             rtime = pick_from_range(rtime);
-            if (negative) rtime *= -1;
+            if (negative_delta)
+            {
+               rtime *= -1;
+            }
             last_modified += rtime;
 #ifdef HAVE_GMTIME_R
             timeptr = gmtime_r(&last_modified, &gmt);
-#elif def MUTEX_LOCKS_AVAILABLE
+#elif defined(MUTEX_LOCKS_AVAILABLE)
             privoxy_mutex_lock(&gmtime_mutex);
             timeptr = gmtime(&last_modified);
             privoxy_mutex_unlock(&gmtime_mutex);
 #else
             timeptr = gmtime(&last_modified);
 #endif
-            strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr);
+            if ((NULL == timeptr) || !strftime(newheader,
+                  sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr))
+            {
+               log_error(LOG_LEVEL_ERROR,
+                  "Randomizing '%s' failed. Crunching the header without replacement.",
+                  *header);
+               freez(*header);
+               return JB_ERR_OK;
+            }
+
             freez(*header);
             *header = strdup("Last-Modified: ");
             string_append(header, newheader);
@@ -2972,9 +3057,7 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header)
    struct tm *timeptr = NULL;
    time_t tm = 0;                  
    const char *newval;
-   long int rtime;
    long int hours, minutes, seconds;
-   int negative = 0;
    char * endptr;
    
    if ( 0 == strcmpic(*header, "If-Modified-Since: Wed, 08 Jun 1955 12:00:00 GMT"))
@@ -3009,15 +3092,16 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header)
          }
          else
          {
-            rtime = strtol(newval, &endptr, 0);
+            long int rtime = strtol(newval, &endptr, 0);
+            const int negative_range = (rtime < 0);
+
             if (rtime)
             {
                log_error(LOG_LEVEL_HEADER, "Randomizing: %s (random range: %d minut%s)",
                   *header, rtime, (rtime == 1 || rtime == -1) ? "e": "es");
-               if (rtime < 0)
+               if (negative_range)
                {
                   rtime *= -1; 
-                  negative = 1;
                }
                rtime *= 60;
                rtime = pick_from_range(rtime);
@@ -3027,17 +3111,25 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header)
                log_error(LOG_LEVEL_ERROR, "Random range is 0. Assuming time transformation test.",
                   *header);
             }
-            tm += rtime * (negative ? -1 : 1);
+            tm += rtime * (negative_range ? -1 : 1);
 #ifdef HAVE_GMTIME_R
             timeptr = gmtime_r(&tm, &gmt);
-#elif def MUTEX_LOCKS_AVAILABLE
+#elif defined(MUTEX_LOCKS_AVAILABLE)
             privoxy_mutex_lock(&gmtime_mutex);
             timeptr = gmtime(&tm);
             privoxy_mutex_unlock(&gmtime_mutex);
 #else
             timeptr = gmtime(&tm);
 #endif
-            strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr);
+            if ((NULL == timeptr) || !strftime(newheader,
+                  sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr))
+            {
+               log_error(LOG_LEVEL_ERROR,
+                  "Randomizing '%s' failed. Crunching the header without replacement.",
+                  *header);
+               freez(*header);
+               return JB_ERR_OK;
+            }
 
             freez(*header);
             *header = strdup("If-Modified-Since: ");
@@ -3055,7 +3147,7 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header)
 
             log_error(LOG_LEVEL_HEADER,
                "Randomized:  %s (%s %d hou%s %d minut%s %d second%s",
-               *header, (negative) ? "subtracted" : "added", hours,
+               *header, (negative_range) ? "subtracted" : "added", hours,
                (hours == 1) ? "r" : "rs", minutes, (minutes == 1) ? "e" : "es",
                seconds, (seconds == 1) ? ")" : "s)");
          }
@@ -3370,8 +3462,21 @@ static jb_err server_connection_adder(struct client_state *csp)
    if ((csp->config->feature_flags &
         RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE)
     && (NULL != response_status_line)
-    && !strncmpic(response_status_line, "HTTP/1.1", 8))
+    && !strncmpic(response_status_line, "HTTP/1.1", 8)
+#ifdef FEATURE_CONNECTION_KEEP_ALIVE
+    && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
+#endif
+    && (csp->http->status == 200)
+       )
    {
+      /*
+       * XXX: not doing this for status codes other than 200 works
+       * around problems with broken servers that will keep the
+       * connection open, but terminate the connection when the
+       * next request arrives. Once we are able to figure out which
+       * requests are safe to send again, this will probably no
+       * longer be necessary.
+       */
       log_error(LOG_LEVEL_HEADER, "A HTTP/1.1 response "
          "without Connection header implies keep-alive.");
       csp->flags |= CSP_FLAG_SERVER_CONNECTION_KEEP_ALIVE;
@@ -3389,7 +3494,8 @@ static jb_err server_connection_adder(struct client_state *csp)
  * Function    :  server_proxy_connection_adder
  *
  * Description :  Adds a "Proxy-Connection: keep-alive" header to
- *                csp->headers. XXX: We should reuse existant ones.
+ *                csp->headers if the client asked for keep-alive.
+ *                XXX: We should reuse existant ones.
  *
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
@@ -3401,8 +3507,16 @@ static jb_err server_connection_adder(struct client_state *csp)
 static jb_err server_proxy_connection_adder(struct client_state *csp)
 {
    static const char proxy_connection_header[] = "Proxy-Connection: keep-alive";
-   log_error(LOG_LEVEL_HEADER, "Adding: %s", proxy_connection_header);
-   return enlist(csp->headers, proxy_connection_header);
+   jb_err err = JB_ERR_OK;
+
+   if ((csp->flags & CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE)
+    && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED))
+   {
+      log_error(LOG_LEVEL_HEADER, "Adding: %s", proxy_connection_header);
+      err = enlist(csp->headers, proxy_connection_header);
+   }
+
+   return err;
 }
 #endif /* FEATURE_CONNECTION_KEEP_ALIVE */
 
@@ -3431,11 +3545,14 @@ static jb_err client_connection_header_adder(struct client_state *csp)
       return JB_ERR_OK;
    }
 
+#ifdef FEATURE_CONNECTION_KEEP_ALIVE
    if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE)
-      && (csp->http->ssl == 0))
+      && (csp->http->ssl == 0)
+      && !strcmpic(csp->http->ver, "HTTP/1.1"))
    {
       csp->flags |= CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
    }
+#endif /* FEATURE_CONNECTION_KEEP_ALIVE */
 
    log_error(LOG_LEVEL_HEADER, "Adding: %s", wanted_header);
 
@@ -4004,12 +4121,38 @@ static const char *get_appropiate_connection_header(const struct client_state *c
    static const char connection_close[] = "Connection: close";
 
    if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE)
+#ifdef FEATURE_CONNECTION_KEEP_ALIVE
+    && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
+#endif
     && (csp->http->ssl == 0))
    {
       return connection_keep_alive;
    }
    return connection_close;
 }
+
+
+/*********************************************************************
+ *
+ * Function    :  create_content_length_header
+ *
+ * Description :  Creates a Content-Length header.
+ *
+ * Parameters  :
+ *          1  :  content_length = The content length to be used in the header.
+ *          2  :  header = Allocated space to safe the header.
+ *          3  :  buffer_length = The length of the allocated space.
+ *
+ * Returns     :  void
+ *
+ *********************************************************************/
+static void create_content_length_header(unsigned long long content_length,
+                                         char *header, size_t buffer_length)
+{
+   snprintf(header, buffer_length, "Content-Length: %llu", content_length);
+}
+
+
 /*
   Local Variables:
   tab-width: 3