Add a client-body-tagger action
[privoxy.git] / jcc.c
diff --git a/jcc.c b/jcc.c
index 54b0b2d..19b2f49 100644 (file)
--- a/jcc.c
+++ b/jcc.c
@@ -5,7 +5,7 @@
  * Purpose     :  Main file.  Contains main() method, main loop, and
  *                the main connection-handling function.
  *
- * Copyright   :  Written by and Copyright (C) 2001-2021 the
+ * Copyright   :  Written by and Copyright (C) 2001-2022 the
  *                Privoxy team. https://www.privoxy.org/
  *
  *                Based on the Internet Junkbuster originally written
@@ -127,6 +127,10 @@ struct file_list     files[1];
 #ifdef FEATURE_STATISTICS
 int urls_read     = 0;     /* total nr of urls read inc rejected */
 int urls_rejected = 0;     /* total nr of urls rejected */
+#ifdef MUTEX_LOCKS_AVAILABLE
+unsigned long long number_of_requests_received = 0;
+unsigned long long number_of_requests_blocked = 0;
+#endif
 #endif /* def FEATURE_STATISTICS */
 
 #ifdef FEATURE_GRACEFUL_TERMINATION
@@ -191,9 +195,12 @@ privoxy_mutex_t external_filter_mutex;
 #ifdef FEATURE_CLIENT_TAGS
 privoxy_mutex_t client_tags_mutex;
 #endif
+#ifdef FEATURE_STATISTICS
+privoxy_mutex_t block_statistics_mutex;
+#endif
 #ifdef FEATURE_EXTENDED_STATISTICS
 privoxy_mutex_t filter_statistics_mutex;
-privoxy_mutex_t block_statistics_mutex;
+privoxy_mutex_t block_reason_statistics_mutex;
 #endif
 
 #if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_GETHOSTBYNAME_R)
@@ -281,6 +288,13 @@ static const char CLIENT_BODY_PARSE_ERROR_RESPONSE[] =
    "Connection: close\r\n\r\n"
    "Failed parsing or buffering the chunk-encoded client body.\n";
 
+static const char CLIENT_BODY_BUFFER_ERROR_RESPONSE[] =
+   "HTTP/1.1 400 Failed reading client body\r\n"
+   "Content-Type: text/plain\r\n"
+   "Connection: close\r\n\r\n"
+   "Failed to buffer the client body to apply content filters.\n"
+   "Could be caused by a socket timeout\n";
+
 static const char UNSUPPORTED_CLIENT_EXPECTATION_ERROR_RESPONSE[] =
    "HTTP/1.1 417 Expecting too much\r\n"
    "Content-Type: text/plain\r\n"
@@ -945,6 +959,11 @@ static int crunch_response_triggered(struct client_state *csp, const struct crun
 #ifdef FEATURE_STATISTICS
             if (c->flags & CF_COUNT_AS_REJECT)
             {
+#ifdef MUTEX_LOCKS_AVAILABLE
+               privoxy_mutex_lock(&block_statistics_mutex);
+               number_of_requests_blocked++;
+               privoxy_mutex_unlock(&block_statistics_mutex);
+#endif
                csp->flags |= CSP_FLAG_REJECTED;
             }
 #endif /* def FEATURE_STATISTICS */
@@ -1813,6 +1832,19 @@ static jb_err receive_client_request(struct client_state *csp)
       free_http_request(http);
       return JB_ERR_PARSE;
    }
+   if (http->ssl && strcmpic(http->gpc, "CONNECT"))
+   {
+      write_socket_delayed(csp->cfd, CHEADER, strlen(CHEADER),
+         get_write_delay(csp));
+      /* XXX: Use correct size */
+      log_error(LOG_LEVEL_CLF, "%s - - [%T] \"Invalid request\" 400 0",
+         csp->ip_addr_str);
+      log_error(LOG_LEVEL_ERROR, "Client %s tried to send a https "
+         "URL without sending a CONNECT request first",
+         csp->ip_addr_str);
+      free_http_request(http);
+      return JB_ERR_PARSE;
+   }
 
    /* grab the rest of the client's headers */
    init_list(headers);
@@ -1835,7 +1867,8 @@ static jb_err receive_client_request(struct client_state *csp)
          if (!data_is_available(csp->cfd, csp->config->socket_timeout))
          {
             log_error(LOG_LEVEL_ERROR,
-               "Stopped grabbing the client headers.");
+               "Client headers did not arrive in time. Timeout: %d",
+               csp->config->socket_timeout);
             destroy_list(headers);
             return JB_ERR_PARSE;
          }
@@ -2140,7 +2173,7 @@ static int update_client_headers(struct client_state *csp, size_t new_content_le
 
 /*********************************************************************
  *
- * Function    : can_filter_request_body
+ * Function    : can_buffer_request_body
  *
  * Description : Checks if the current request body can be stored in
  *               the client_iob without hitting buffer limit.
@@ -2152,14 +2185,14 @@ static int update_client_headers(struct client_state *csp, size_t new_content_le
  *               FALSE otherwise.
  *
  *********************************************************************/
-static int can_filter_request_body(const struct client_state *csp)
+static int can_buffer_request_body(const struct client_state *csp)
 {
    if (!can_add_to_iob(csp->client_iob, csp->config->buffer_limit,
                        csp->expected_client_content_length))
    {
       log_error(LOG_LEVEL_INFO,
          "Not filtering request body from %s: buffer limit %lu will be exceeded "
-         "(content length %lluu)", csp->ip_addr_str, csp->config->buffer_limit,
+         "(content length %llu)", csp->ip_addr_str, csp->config->buffer_limit,
          csp->expected_client_content_length);
       return FALSE;
    }
@@ -2177,39 +2210,13 @@ static int can_filter_request_body(const struct client_state *csp)
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
  *
- * Returns     :  0 on success, anything else is an error.
+ * Returns     :  0 on success, 1 on error, 2 if the request got crunched.
  *
  *********************************************************************/
 static int send_http_request(struct client_state *csp)
 {
    char *hdr;
    int write_failure;
-   const char *to_send;
-   size_t to_send_len;
-   int filter_client_body = csp->expected_client_content_length != 0 &&
-      client_body_filters_enabled(csp->action) && can_filter_request_body(csp);
-
-   if (filter_client_body)
-   {
-      if (read_http_request_body(csp))
-      {
-         return 1;
-      }
-      to_send_len = csp->expected_client_content_length;
-      to_send = execute_client_body_filters(csp, &to_send_len);
-      if (to_send == NULL)
-      {
-         /* just flush client_iob */
-         filter_client_body = FALSE;
-      }
-      else if (to_send_len != csp->expected_client_content_length &&
-         update_client_headers(csp, to_send_len))
-      {
-         log_error(LOG_LEVEL_HEADER, "Error updating client headers");
-         return 1;
-      }
-      csp->expected_client_content_length = 0;
-   }
 
    hdr = list_to_text(csp->headers);
    if (hdr == NULL)
@@ -2233,18 +2240,7 @@ static int send_http_request(struct client_state *csp)
       return 1;
    }
 
-   if (filter_client_body)
-   {
-      write_failure = 0 != write_socket(csp->server_connection.sfd, to_send, to_send_len);
-      freez(to_send);
-      if (write_failure)
-      {
-         log_error(LOG_LEVEL_CONNECT, "Failed sending filtered request body to: %s: %E",
-            csp->http->hostport);
-         return 1;
-      }
-   }
-
+   /* XXX: Filtered data is not sent if there's a pipelined request? */
    if (((csp->flags & CSP_FLAG_PIPELINED_REQUEST_WAITING) == 0)
       && (flush_iob(csp->server_connection.sfd, csp->client_iob, 0) < 0))
    {
@@ -2291,13 +2287,15 @@ static int read_https_request_body(struct client_state *csp)
       size_t max_bytes_to_read = to_read < sizeof(buf) ? to_read : sizeof(buf);
 
       log_error(LOG_LEVEL_CONNECT,
-         "Waiting for up to %lu bytes of request body from the client.",
+         "Buffering encrypted client body. Prepared to read up to %lu bytes.",
          max_bytes_to_read);
       len = ssl_recv_data(&(csp->ssl_client_attr), buf,
          (unsigned)max_bytes_to_read);
       if (len <= 0)
       {
-         log_error(LOG_LEVEL_CONNECT, "Failed receiving request body from %s", csp->ip_addr_str);
+         log_error(LOG_LEVEL_CONNECT,
+            "Did not receive the whole encrypted request body from %s",
+            csp->ip_addr_str);
          return 1;
       }
       if (add_to_iob(csp->client_iob, csp->config->buffer_limit, (char *)buf, len))
@@ -2310,13 +2308,13 @@ static int read_https_request_body(struct client_state *csp)
    if (to_read != 0)
    {
       log_error(LOG_LEVEL_CONNECT,
-         "Not enough request body has been read: expected %lu more bytes",
+         "Not enough encrypted request body has been read: expected %lu more bytes",
          to_read);
       return 1;
    }
 
    log_error(LOG_LEVEL_CONNECT,
-      "The last %llu bytes of the request body have been read",
+      "The last %llu bytes of the encrypted request body have been read",
       csp->expected_client_content_length);
    return 0;
 }
@@ -2339,8 +2337,9 @@ static int receive_and_send_encrypted_post_data(struct client_state *csp)
 {
    int content_length_known = csp->expected_client_content_length != 0;
 
-   while (is_ssl_pending(&(csp->ssl_client_attr))
-      || (content_length_known && csp->expected_client_content_length != 0))
+   while ((content_length_known && csp->expected_client_content_length != 0) ||
+      (is_ssl_pending(&(csp->ssl_client_attr)) ||
+            data_is_available(csp->cfd, csp->config->socket_timeout)))
    {
       unsigned char buf[BUFFER_SIZE];
       int len;
@@ -2351,7 +2350,7 @@ static int receive_and_send_encrypted_post_data(struct client_state *csp)
          max_bytes_to_read = (int)csp->expected_client_content_length;
       }
       log_error(LOG_LEVEL_CONNECT,
-         "Waiting for up to %d bytes of request body from the client.",
+         "Prepared to read up to %d bytes of encrypted request body from the client.",
          max_bytes_to_read);
       len = ssl_recv_data(&(csp->ssl_client_attr), buf,
          (unsigned)max_bytes_to_read);
@@ -2410,32 +2409,6 @@ static int send_https_request(struct client_state *csp)
    char *hdr;
    int ret;
    long flushed = 0;
-   const char *to_send;
-   size_t to_send_len;
-   int filter_client_body = csp->expected_client_content_length != 0 &&
-      client_body_filters_enabled(csp->action) && can_filter_request_body(csp);
-
-   if (filter_client_body)
-   {
-      if (read_https_request_body(csp))
-      {
-         return 1;
-      }
-      to_send_len = csp->expected_client_content_length;
-      to_send = execute_client_body_filters(csp, &to_send_len);
-      if (to_send == NULL)
-      {
-         /* just flush client_iob */
-         filter_client_body = FALSE;
-      }
-      else if (to_send_len != csp->expected_client_content_length &&
-         update_client_headers(csp, to_send_len))
-      {
-         log_error(LOG_LEVEL_HEADER, "Error updating client headers");
-         return 1;
-      }
-      csp->expected_client_content_length = 0;
-   }
 
    hdr = list_to_text(csp->https_headers);
    if (hdr == NULL)
@@ -2462,18 +2435,7 @@ static int send_https_request(struct client_state *csp)
       return 1;
    }
 
-   if (filter_client_body)
-   {
-      ret = ssl_send_data(&(csp->ssl_server_attr), (const unsigned char *)to_send, to_send_len);
-      freez(to_send);
-      if (ret < 0)
-      {
-         log_error(LOG_LEVEL_CONNECT, "Failed sending filtered request body to: %s",
-            csp->http->hostport);
-         return 1;
-      }
-   }
-
+   /* XXX: Client body isn't sent if there's pipelined data? */
    if (((csp->flags & CSP_FLAG_PIPELINED_REQUEST_WAITING) == 0)
       && ((flushed = ssl_flush_socket(&(csp->ssl_server_attr),
             csp->client_iob)) < 0))
@@ -2801,6 +2763,8 @@ static jb_err process_encrypted_request_headers(struct client_state *csp)
          "Failed to get the encrypted request destination");
       ssl_send_data_delayed(&(csp->ssl_client_attr),
          (const unsigned char *)CHEADER, strlen(CHEADER), get_write_delay(csp));
+      destroy_list(headers);
+
       return JB_ERR_PARSE;
    }
 
@@ -2945,6 +2909,12 @@ static void continue_https_chat(struct client_state *csp)
       return;
    }
 
+#if defined(FEATURE_STATISTICS) && defined(MUTEX_LOCKS_AVAILABLE)
+   privoxy_mutex_lock(&block_statistics_mutex);
+   number_of_requests_received++;
+   privoxy_mutex_unlock(&block_statistics_mutex);
+#endif
+
    csp->requests_received_total++;
 
    /*
@@ -2968,6 +2938,46 @@ static void continue_https_chat(struct client_state *csp)
    }
    assert(csp->server_connection.sfd != JB_INVALID_SOCKET);
 
+   if (csp->expected_client_content_length != 0 &&
+      (client_body_filters_enabled(csp->action) ||
+       client_body_taggers_enabled(csp->action)) &&
+      can_buffer_request_body(csp))
+   {
+      int content_modified;
+
+      if (read_https_request_body(csp))
+      {
+         /* XXX: handle */
+         return;
+      }
+      if (client_body_taggers_enabled(csp->action))
+      {
+         execute_client_body_taggers(csp, csp->expected_client_content_length);
+         if (crunch_response_triggered(csp, crunchers_all))
+         {
+            /*
+             * Yes. The client got the crunch response and we're done here.
+             */
+            return;
+         }
+      }
+      if (client_body_filters_enabled(csp->action))
+      {
+         size_t modified_content_length = csp->expected_client_content_length;
+         content_modified = execute_client_body_filters(csp,
+            &modified_content_length);
+         if ((content_modified == 1) &&
+            (modified_content_length != csp->expected_client_content_length) &&
+            update_client_headers(csp, modified_content_length))
+         {
+            /* XXX: Send error response */
+            log_error(LOG_LEVEL_HEADER, "Error updating client headers");
+            return;
+         }
+      }
+      csp->expected_client_content_length = 0;
+   }
+
    fwd = forward_url(csp, csp->http);
    if (!connection_destination_matches(&csp->server_connection, csp->http, fwd))
    {
@@ -3101,6 +3111,16 @@ static void handle_established_connection(struct client_state *csp)
           */
          goto server_wants_to_talk;
       }
+      if (watch_client_socket && client_use_ssl(csp) &&
+         is_ssl_pending(&(csp->ssl_client_attr)))
+      {
+         /*
+          * The TLS libray may also consume all of the remaining data
+          * from the client when we're shuffling the data from an
+          * unbuffered request body to the server.
+          */
+         goto client_wants_to_talk;
+      }
 #endif
 #ifndef HAVE_POLL
       FD_ZERO(&rfds);
@@ -3242,7 +3262,13 @@ static void handle_established_connection(struct client_state *csp)
       if (FD_ISSET(csp->cfd, &rfds))
 #endif /* def HAVE_POLL*/
       {
-         int max_bytes_to_read = (int)csp->receive_buffer_size;
+         int max_bytes_to_read;
+
+#ifdef FEATURE_HTTPS_INSPECTION
+         client_wants_to_talk:
+#endif
+
+         max_bytes_to_read = (int)csp->receive_buffer_size;
 
 #ifdef FEATURE_CONNECTION_KEEP_ALIVE
          if ((csp->flags & CSP_FLAG_CLIENT_REQUEST_COMPLETELY_READ))
@@ -4170,6 +4196,13 @@ static void chat(struct client_state *csp)
    {
       return;
    }
+
+#if defined(FEATURE_STATISTICS) && defined(MUTEX_LOCKS_AVAILABLE)
+   privoxy_mutex_lock(&block_statistics_mutex);
+   number_of_requests_received++;
+   privoxy_mutex_unlock(&block_statistics_mutex);
+#endif
+
    if (parse_client_request(csp) != JB_ERR_OK)
    {
       return;
@@ -4341,6 +4374,72 @@ static void chat(struct client_state *csp)
    }
 #endif
 
+   /* If we need to apply client body filters, buffer the whole request now. */
+   if (csp->expected_client_content_length != 0 &&
+      (client_body_filters_enabled(csp->action) ||
+         client_body_taggers_enabled(csp->action)) &&
+      can_buffer_request_body(csp))
+   {
+      int content_modified;
+      size_t modified_content_length;
+
+#ifdef FEATURE_HTTPS_INSPECTION
+      if (client_use_ssl(csp) && read_https_request_body(csp))
+      {
+         log_error(LOG_LEVEL_ERROR, "Failed to buffer the encrypted "
+            "request body to apply filters or taggers.");
+         log_error(LOG_LEVEL_CLF,
+            "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd);
+
+         ssl_send_data_delayed(&(csp->ssl_client_attr),
+            (const unsigned char *)CLIENT_BODY_BUFFER_ERROR_RESPONSE,
+            strlen(CLIENT_BODY_BUFFER_ERROR_RESPONSE),
+            get_write_delay(csp));
+
+         return;
+      }
+      else
+#endif
+      if (read_http_request_body(csp))
+      {
+         log_error(LOG_LEVEL_ERROR,
+            "Failed to buffer the request body to apply filters or taggers,");
+         log_error(LOG_LEVEL_CLF,
+            "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd);
+
+         write_socket_delayed(csp->cfd, CLIENT_BODY_BUFFER_ERROR_RESPONSE,
+            strlen(CLIENT_BODY_BUFFER_ERROR_RESPONSE), get_write_delay(csp));
+
+         return;
+      }
+      if (client_body_taggers_enabled(csp->action))
+      {
+         execute_client_body_taggers(csp, csp->expected_client_content_length);
+         if (crunch_response_triggered(csp, crunchers_all))
+         {
+            /*
+             * Yes. The client got the crunch response and we're done here.
+             */
+            return;
+         }
+      }
+      if (client_body_filters_enabled(csp->action))
+      {
+         modified_content_length = csp->expected_client_content_length;
+         content_modified = execute_client_body_filters(csp,
+            &modified_content_length);
+         if ((content_modified == 1) &&
+            (modified_content_length != csp->expected_client_content_length) &&
+            update_client_headers(csp, modified_content_length))
+         {
+            /* XXX: Send error response */
+            log_error(LOG_LEVEL_HEADER, "Error updating client headers");
+            return;
+         }
+      }
+      csp->expected_client_content_length = 0;
+   }
+
    log_applied_actions(csp->action);
 
    /* decide how to route the HTTP request */
@@ -4581,7 +4680,13 @@ static void chat(struct client_state *csp)
 #endif
            ))
    {
-      if (send_http_request(csp))
+      int status = send_http_request(csp);
+      if (status == 2)
+      {
+         /* The request got crunched, a response has been delivered. */
+         return;
+      }
+      if (status != 0)
       {
          rsp = error_response(csp, "connect-failed");
          if (rsp)
@@ -5244,9 +5349,12 @@ static void initialize_mutexes(void)
 #ifdef FEATURE_CLIENT_TAGS
    privoxy_mutex_init(&client_tags_mutex);
 #endif
+#ifdef FEATURE_STATISTICS
+   privoxy_mutex_init(&block_statistics_mutex);
+#endif
 #ifdef FEATURE_EXTENDED_STATISTICS
    privoxy_mutex_init(&filter_statistics_mutex);
-   privoxy_mutex_init(&block_statistics_mutex);
+   privoxy_mutex_init(&block_reason_statistics_mutex);
 #endif
 
    /*
@@ -6276,6 +6384,8 @@ static void listen_loop(void)
 
    log_error(LOG_LEVEL_INFO, "Graceful termination requested.");
 
+   close_ports_helper(bfds);
+
    unload_current_config_file();
    unload_current_actions_file();
    unload_current_re_filterfile();