Improve the handling of chunk-encoded responses
authorFabian Keil <fk@fabiankeil.de>
Mon, 29 Mar 2021 09:44:35 +0000 (11:44 +0200)
committerFabian Keil <fk@fabiankeil.de>
Fri, 6 May 2022 10:39:55 +0000 (12:39 +0200)
... by buffering the data even if filters are disabled and
properly keeping track of where the various chunks are supposed
to start and end.

Previously Privoxy would merely check the last bytes received
to see if they looked like the last-chunk.

This failed to work if the last-chunk wasn't received in one
read and could also result in actual data being misdetected
as last-chunk.

Should fix: SF support request #1739
Reported by: withoutname

filters.c
filters.h
jcc.c

index 5533fcd..baaaea5 100644 (file)
--- a/filters.c
+++ b/filters.c
@@ -2113,6 +2113,172 @@ static filter_function_ptr get_filter_function(const struct client_state *csp)
 }
 
 
+/*********************************************************************
+ *
+ * Function    :  get_bytes_to_next_chunk_start
+ *
+ * Description :  Returns the number of bytes to the start of the
+ *                next chunk in the buffer.
+ *
+ * Parameters  :
+ *          1  :  buffer = Pointer to the text buffer
+ *          2  :  size = Number of bytes in the buffer.
+ *          3  :  offset = Where to expect the beginning of the next chunk.
+ *
+ * Returns     :  -1 if the size can't be determined or data is missing,
+ *                otherwise the number of bytes to the start of the next chunk
+ *                or 0 if the last chunk has been fully buffered.
+ *
+ *********************************************************************/
+static int get_bytes_to_next_chunk_start(char *buffer, size_t size, size_t offset)
+{
+   char *chunk_start;
+   char *p;
+   unsigned int chunk_size = 0;
+   int bytes_to_skip;
+
+   if (size <= offset || size < 5)
+   {
+      /*
+       * Not enough bytes bufferd to figure
+       * out the size of the next chunk.
+       */
+      return -1;
+   }
+
+   chunk_start = buffer + offset;
+
+   p = strstr(chunk_start, "\r\n");
+   if (NULL == p)
+   {
+      /*
+       * The line with the chunk-size hasn't been completely received
+       * yet (or is invalid).
+       */
+      log_error(LOG_LEVEL_RE_FILTER,
+         "Not enough or invalid data in buffer in chunk size line.");
+      return -1;
+   }
+
+   if (sscanf(chunk_start, "%x", &chunk_size) != 1)
+   {
+      /* XXX: Write test case to trigger this. */
+      log_error(LOG_LEVEL_ERROR, "Failed to parse chunk size. "
+         "Size: %lu, offset: %lu. Chunk size start: %N", size, offset,
+         (size - offset), chunk_start);
+      return -1;
+   }
+
+   /*
+    * To get to the start of the next chunk size we have to skip
+    * the line with the current chunk size followed by "\r\n" followd
+    * by the actual data and another "\r\n" following the data.
+    */
+   bytes_to_skip = (int)(p - chunk_start) + 2 + (int)chunk_size + 2;
+
+   if (bytes_to_skip <= 0)
+   {
+      log_error(LOG_LEVEL_ERROR,
+         "Failed to figure out chunk offset. %u and %d seem dubious.",
+         chunk_size, bytes_to_skip);
+      return -1;
+   }
+   if (chunk_size == 0)
+   {
+      if (bytes_to_skip <= (size - offset))
+      {
+         return 0;
+      }
+      else
+      {
+         log_error(LOG_LEVEL_INFO,
+            "Last chunk detected but we're still missing data.");
+         return -1;
+      }
+   }
+
+   return bytes_to_skip;
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  get_bytes_missing_from_chunked_data
+ *
+ * Description :  Figures out how many bytes of data we need to get
+ *                to the start of the next chunk of data (XXX: terminology).
+ *                Due to the nature of chunk-encoded data we can only see
+ *                how many data is missing according to the last chunk size
+ *                buffered.
+ *
+ * Parameters  :
+ *          1  :  buffer = Pointer to the text buffer
+ *          2  :  size = Number of bytes in the buffer.
+ *          3  :  offset = Where to expect the beginning of the next chunk.
+ *
+ * Returns     :  -1 if the data can't be parsed (yet),
+ *                 0 if the buffer is complete or a
+ *                 number of bytes that is missing.
+ *
+ *********************************************************************/
+int get_bytes_missing_from_chunked_data(char *buffer, size_t size, size_t offset)
+{
+   int ret = -1;
+   int last_valid_offset = -1;
+
+   if (size < offset || size < 5)
+   {
+      /* Not enough data buffered yet */
+      return -1;
+   }
+
+   do
+   {
+      ret = get_bytes_to_next_chunk_start(buffer, size, offset);
+      if (ret == -1)
+      {
+         return last_valid_offset;
+      }
+      if (ret == 0)
+      {
+         return 0;
+      }
+      if (offset != 0)
+      {
+         last_valid_offset = (int)offset;
+      }
+      offset += (size_t)ret;
+   } while (offset < size);
+
+   return (int)offset;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  chunked_data_is_complete
+ *
+ * Description :  Detects if a buffer with chunk-encoded data looks
+ *                complete.
+ *
+ * Parameters  :
+ *          1  :  buffer = Pointer to the text buffer
+ *          2  :  size = Number of bytes in the buffer.
+ *          3  :  offset = Where to expect the beginning of the
+ *                         first complete chunk.
+ *
+ * Returns     :  TRUE if it looks like the data is complete,
+ *                FALSE otherwise.
+ *
+ *********************************************************************/
+int chunked_data_is_complete(char *buffer, size_t size, size_t offset)
+{
+   return (0 == get_bytes_missing_from_chunked_data(buffer, size, offset));
+
+}
+
+
 /*********************************************************************
  *
  * Function    :  remove_chunked_transfer_coding
@@ -2350,8 +2516,10 @@ char *execute_content_filters(struct client_state *csp)
    if (JB_ERR_OK != prepare_for_filtering(csp))
    {
       /*
-       * failed to de-chunk or decompress.
+       * We failed to de-chunk or decompress, don't accept
+       * another request on the client connection.
        */
+      csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
       return NULL;
    }
 
index 11b3e85..54873af 100644 (file)
--- a/filters.h
+++ b/filters.h
@@ -102,6 +102,9 @@ extern int filters_available(const struct client_state *csp);
  */
 extern struct http_response *direct_response(struct client_state *csp);
 
+extern int get_bytes_missing_from_chunked_data(char *buffer, size_t size, size_t offset);
+extern int chunked_data_is_complete(char *buffer, size_t size, size_t offset);
+
 #ifdef FUZZ
 extern char *gif_deanimate_response(struct client_state *csp);
 extern jb_err remove_chunked_transfer_coding(char *buffer, size_t *size);
diff --git a/jcc.c b/jcc.c
index 19b2f49..5b83d17 100644 (file)
--- a/jcc.c
+++ b/jcc.c
@@ -1653,6 +1653,14 @@ extern int fuzz_chunked_transfer_encoding(struct client_state *csp, char *fuzz_i
    {
       log_error(LOG_LEVEL_INFO, "Chunked body is incomplete or invalid");
    }
+   if (get_bytes_missing_from_chunked_data(csp->iob->cur, size, 0) == 0)
+   {
+      if (CHUNK_STATUS_BODY_COMPLETE != status)
+      {
+         log_error(LOG_LEVEL_ERROR,
+            "There's disagreement about whether or not the chunked body is complete.");
+      }
+   }
 
    return (JB_ERR_OK == remove_chunked_transfer_coding(csp->iob->cur, &size));
 
@@ -3051,6 +3059,7 @@ static void handle_established_connection(struct client_state *csp)
    long len = 0; /* for buffer sizes (and negative error codes) */
    int buffer_and_filter_content = 0;
    unsigned int write_delay;
+   size_t chunk_offset = 0;
 #ifdef FEATURE_HTTPS_INSPECTION
    int ret = 0;
    int use_ssl_tunnel = 0;
@@ -3139,23 +3148,6 @@ static void handle_established_connection(struct client_state *csp)
 #endif /* ndef HAVE_POLL */
 
 #ifdef FEATURE_CONNECTION_KEEP_ALIVE
-      if ((csp->flags & CSP_FLAG_CHUNKED)
-         && !(csp->flags & CSP_FLAG_CONTENT_LENGTH_SET)
-         && ((csp->iob->eod - csp->iob->cur) >= 5)
-         && !memcmp(csp->iob->eod-5, "0\r\n\r\n", 5))
-      {
-         /*
-          * XXX: This check should be obsolete now,
-          *      but let's wait a while to be sure.
-          */
-         log_error(LOG_LEVEL_CONNECT,
-            "Looks like we got the last chunk together with "
-            "the server headers but didn't detect it earlier. "
-            "We better stop reading.");
-         byte_count = (unsigned long long)(csp->iob->eod - csp->iob->cur);
-         csp->expected_content_length = byte_count;
-         csp->flags |= CSP_FLAG_CONTENT_LENGTH_SET;
-      }
       if (server_body && server_response_is_complete(csp, byte_count))
       {
          if (csp->expected_content_length == byte_count)
@@ -3489,18 +3481,6 @@ static void handle_established_connection(struct client_state *csp)
          }
 
 #ifdef FEATURE_CONNECTION_KEEP_ALIVE
-         if (csp->flags & CSP_FLAG_CHUNKED)
-         {
-            if ((len >= 5) && !memcmp(csp->receive_buffer+len-5, "0\r\n\r\n", 5))
-            {
-               /* XXX: this is a temporary hack */
-               log_error(LOG_LEVEL_CONNECT,
-                  "Looks like we reached the end of the last chunk. "
-                  "We better stop reading.");
-               csp->expected_content_length = byte_count + (unsigned long long)len;
-               csp->flags |= CSP_FLAG_CONTENT_LENGTH_SET;
-            }
-         }
          reading_done:
 #endif  /* FEATURE_CONNECTION_KEEP_ALIVE */
 
@@ -3743,6 +3723,27 @@ static void handle_established_connection(struct client_state *csp)
                    */
                   byte_count = (unsigned long long)flushed;
                   freez(hdr);
+                  if ((csp->flags & CSP_FLAG_CHUNKED) && (chunk_offset != 0))
+                  {
+                     log_error(LOG_LEVEL_CONNECT,
+                        "Reducing chunk offset %lu by %ld to %lu.", chunk_offset, flushed,
+                        (chunk_offset - (unsigned)flushed));
+                     assert(chunk_offset >= flushed); /* XXX: Reachable with malicious input? */
+                     chunk_offset -= (unsigned)flushed;
+
+                     /* Make room in the iob. */
+                     csp->iob->cur = csp->iob->eod = csp->iob->buf;
+
+                     if (add_to_iob(csp->iob, csp->config->buffer_limit,
+                           csp->receive_buffer, len))
+                     {
+                        /* This is not supposed to happen but ... */
+                        csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
+                        log_error(LOG_LEVEL_ERROR, "Failed to buffer %ld bytes of "
+                           "chunk-encoded data after resetting the buffer.", len);
+                        return;
+                     }
+                  }
                   buffer_and_filter_content = 0;
                   server_body = 1;
                }
@@ -3778,8 +3779,66 @@ static void handle_established_connection(struct client_state *csp)
                      return;
                   }
                }
+               if (csp->flags & CSP_FLAG_CHUNKED)
+               {
+                  /*
+                   * While we don't need the data to filter it, put it in the
+                   * buffer so we can keep track of the offset to the start of
+                   * the next chunk and detect when the response is finished.
+                   */
+                  size_t encoded_bytes = (size_t)(csp->iob->eod - csp->iob->cur);
+
+                  if (csp->config->buffer_limit / 4 < encoded_bytes)
+                  {
+                     /*
+                      * Reset the buffer to reduce the memory footprint.
+                      */
+                     log_error(LOG_LEVEL_CONNECT,
+                        "Reducing the chunk offset from %lu to %lu after "
+                        "discarding %lu bytes to make room in the buffer.",
+                        chunk_offset, (chunk_offset - encoded_bytes),
+                        encoded_bytes);
+                     chunk_offset -= encoded_bytes;
+                     csp->iob->cur = csp->iob->eod = csp->iob->buf;
+                  }
+                  if (add_to_iob(csp->iob, csp->config->buffer_limit,
+                     csp->receive_buffer, len))
+                  {
+                     /* This is not supposed to happen but ... */
+                     csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
+                     log_error(LOG_LEVEL_ERROR,
+                        "Failed to buffer %ld bytes of chunk-encoded data.",
+                        len);
+                     return;
+                  }
+               }
             }
             byte_count += (unsigned long long)len;
+
+            if (csp->flags & CSP_FLAG_CHUNKED)
+            {
+               int rc;
+               size_t encoded_bytes = (size_t)(csp->iob->eod - csp->iob->cur);
+
+               rc = get_bytes_missing_from_chunked_data(csp->iob->cur, encoded_bytes,
+                  chunk_offset);
+               if (rc >= 0)
+               {
+                  if (rc != 0)
+                  {
+                     chunk_offset = (size_t)rc;
+                  }
+
+                  if (chunked_data_is_complete(csp->iob->cur, encoded_bytes, chunk_offset))
+                  {
+                     log_error(LOG_LEVEL_CONNECT,
+                        "We buffered the last chunk of the response.");
+                     csp->expected_content_length = byte_count;
+                     csp->flags |= CSP_FLAG_CONTENT_LENGTH_SET;
+                  }
+               }
+            }
+
             continue;
          }
          else
@@ -3986,18 +4045,30 @@ static void handle_established_connection(struct client_state *csp)
             }
 
             if ((csp->flags & CSP_FLAG_CHUNKED)
-               && !(csp->flags & CSP_FLAG_CONTENT_LENGTH_SET)
-               && ((csp->iob->eod - csp->iob->cur) >= 5)
-               && !memcmp(csp->iob->eod-5, "0\r\n\r\n", 5))
+               && !(csp->flags & CSP_FLAG_CONTENT_LENGTH_SET))
             {
-               log_error(LOG_LEVEL_CONNECT,
-                  "Looks like we got the last chunk together with "
-                  "the server headers. We better stop reading.");
-               byte_count = (unsigned long long)(csp->iob->eod - csp->iob->cur);
-               csp->expected_content_length = byte_count;
-               csp->flags |= CSP_FLAG_CONTENT_LENGTH_SET;
-            }
+               int rc;
+               size_t encoded_size = (size_t)(csp->iob->eod - csp->iob->cur);
 
+               rc = get_bytes_missing_from_chunked_data(csp->iob->cur, encoded_size,
+                  chunk_offset);
+               if (rc >= 0)
+               {
+                  if (rc != 0)
+                  {
+                     chunk_offset = (size_t)rc;
+                  }
+                  if (chunked_data_is_complete(csp->iob->cur, encoded_size, chunk_offset))
+                  {
+                     log_error(LOG_LEVEL_CONNECT,
+                        "Looks like we got the last chunk together with "
+                        "the server headers. We better stop reading.");
+                     byte_count = (unsigned long long)(csp->iob->eod - csp->iob->cur);
+                     csp->expected_content_length = byte_count;
+                     csp->flags |= CSP_FLAG_CONTENT_LENGTH_SET;
+                  }
+               }
+            }
             csp->server_connection.response_received = time(NULL);
 
             if (crunch_response_triggered(csp, crunchers_light))
@@ -4066,6 +4137,32 @@ static void handle_established_connection(struct client_state *csp)
                      mark_server_socket_tainted(csp);
                      return;
                   }
+               }
+               if (csp->flags & CSP_FLAG_CHUNKED &&
+                 !(csp->flags & CSP_FLAG_CONTENT_LENGTH_SET))
+               {
+                  /*
+                   * In case of valid data we shouldn't flush more
+                   * data than chunk_offset but the data may be invalid.
+                   */
+                  if (chunk_offset >= len)
+                  {
+                     log_error(LOG_LEVEL_CONNECT,
+                        "Reducing chunk offset from %lu to %lu after flushing %ld bytes",
+                        chunk_offset, (chunk_offset - (unsigned)len), len);
+                     chunk_offset = chunk_offset - (unsigned)len;
+                  }
+                  else
+                  {
+                     log_error(LOG_LEVEL_CONNECT,
+                        "Keeping chunk offset at %lu despite flushing %ld bytes",
+                        chunk_offset, len);
+                     /*
+                      * If we can't parse the chunk-encoded data we should
+                      * not reuse the server connection.
+                      */
+                     mark_server_socket_tainted(csp);
+                  }
                }
                                }