Add a client-body-tagger action
authorFabian Keil <fk@fabiankeil.de>
Tue, 23 Mar 2021 14:45:54 +0000 (15:45 +0100)
committerFabian Keil <fk@fabiankeil.de>
Tue, 26 Apr 2022 14:51:37 +0000 (16:51 +0200)
... which creates tags based on the content of the request body.

Sponsored by: Robert Klemme

actionlist.h
actions.c
filters.c
filters.h
jcc.c
loaders.c
project.h

index fc7f514..d95b822 100644 (file)
@@ -57,6 +57,7 @@ DEFINE_ACTION_STRING     ("change-x-forwarded-for",     ACTION_CHANGE_X_FORWARDE
 DEFINE_CGI_PARAM_RADIO   ("change-x-forwarded-for",     ACTION_CHANGE_X_FORWARDED_FOR,  ACTION_STRING_CHANGE_X_FORWARDED_FOR, "block", 0)
 DEFINE_CGI_PARAM_RADIO   ("change-x-forwarded-for",     ACTION_CHANGE_X_FORWARDED_FOR,  ACTION_STRING_CHANGE_X_FORWARDED_FOR, "add", 1)
 DEFINE_ACTION_MULTI      ("client-body-filter",         ACTION_MULTI_CLIENT_BODY_FILTER)
 DEFINE_CGI_PARAM_RADIO   ("change-x-forwarded-for",     ACTION_CHANGE_X_FORWARDED_FOR,  ACTION_STRING_CHANGE_X_FORWARDED_FOR, "block", 0)
 DEFINE_CGI_PARAM_RADIO   ("change-x-forwarded-for",     ACTION_CHANGE_X_FORWARDED_FOR,  ACTION_STRING_CHANGE_X_FORWARDED_FOR, "add", 1)
 DEFINE_ACTION_MULTI      ("client-body-filter",         ACTION_MULTI_CLIENT_BODY_FILTER)
+DEFINE_ACTION_MULTI      ("client-body-tagger",         ACTION_MULTI_CLIENT_BODY_TAGGER)
 DEFINE_ACTION_MULTI      ("client-header-filter",       ACTION_MULTI_CLIENT_HEADER_FILTER)
 DEFINE_ACTION_MULTI      ("client-header-tagger",       ACTION_MULTI_CLIENT_HEADER_TAGGER)
 DEFINE_ACTION_STRING     ("content-type-overwrite",     ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE)
 DEFINE_ACTION_MULTI      ("client-header-filter",       ACTION_MULTI_CLIENT_HEADER_FILTER)
 DEFINE_ACTION_MULTI      ("client-header-tagger",       ACTION_MULTI_CLIENT_HEADER_TAGGER)
 DEFINE_ACTION_STRING     ("content-type-overwrite",     ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE)
index a80ee64..7fbcd3a 100644 (file)
--- a/actions.c
+++ b/actions.c
@@ -1115,6 +1115,8 @@ static const char *filter_type_to_string(enum filter_type filter_type)
       return "suppress tag filter";
    case FT_CLIENT_BODY_FILTER:
       return "client body filter";
       return "suppress tag filter";
    case FT_CLIENT_BODY_FILTER:
       return "client body filter";
+   case FT_CLIENT_BODY_TAGGER:
+      return "client body tagger";
    case FT_ADD_HEADER:
       return "add-header action";
 #ifdef FEATURE_EXTERNAL_FILTERS
    case FT_ADD_HEADER:
       return "add-header action";
 #ifdef FEATURE_EXTERNAL_FILTERS
index 2dd6503..5533fcd 100644 (file)
--- a/filters.c
+++ b/filters.c
@@ -2448,6 +2448,177 @@ int execute_client_body_filters(struct client_state *csp, size_t *content_length
 }
 
 
 }
 
 
+/*********************************************************************
+ *
+ * Function    :  execute_client_body_taggers
+ *
+ * Description :  Executes client body taggers for the request that is
+ *                buffered in the client_iob.
+ *                XXX: Lots of code shared with header_tagger
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  content_length = content length.
+ *
+ * Returns     :  XXX
+ *
+ *********************************************************************/
+jb_err execute_client_body_taggers(struct client_state *csp, size_t content_length)
+{
+   enum filter_type wanted_filter_type = FT_CLIENT_BODY_TAGGER;
+   int multi_action_index = ACTION_MULTI_CLIENT_BODY_TAGGER;
+   pcrs_job *job;
+
+   struct re_filterfile_spec *b;
+   struct list_entry *tag_name;
+
+   assert(client_body_taggers_enabled(csp->action));
+
+   if (content_length == 0)
+   {
+      /*
+       * No content, no tagging necessary.
+       */
+      return JB_ERR_OK;
+   }
+
+   log_error(LOG_LEVEL_INFO, "Got to execute tagger on %N",
+      content_length, csp->client_iob->cur);
+
+   if (list_is_empty(csp->action->multi[multi_action_index])
+      || filters_available(csp) == FALSE)
+   {
+      /* Return early if no taggers apply or if none are available. */
+      return JB_ERR_OK;
+   }
+
+   /* Execute all applying taggers */
+   for (tag_name = csp->action->multi[multi_action_index]->first;
+        NULL != tag_name; tag_name = tag_name->next)
+   {
+      char *modified_tag = NULL;
+      char *tag = csp->client_iob->cur;
+      size_t size = content_length;
+      pcrs_job *joblist;
+
+      b = get_filter(csp, tag_name->str, wanted_filter_type);
+      if (b == NULL)
+      {
+         continue;
+      }
+
+      joblist = b->joblist;
+
+      if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+
+      if (NULL == joblist)
+      {
+         log_error(LOG_LEVEL_TAGGING,
+            "Tagger %s has empty joblist. Nothing to do.", b->name);
+         continue;
+      }
+
+      /* execute their pcrs_joblist on the body. */
+      for (job = joblist; NULL != job; job = job->next)
+      {
+         const int hits = pcrs_execute(job, tag, size, &modified_tag, &size);
+
+         if (0 < hits)
+         {
+            /* Success, continue with the modified version. */
+            if (tag != csp->client_iob->cur)
+            {
+               freez(tag);
+            }
+            tag = modified_tag;
+         }
+         else
+         {
+            /* Tagger doesn't match */
+            if (0 > hits)
+            {
+               /* Regex failure, log it but continue anyway. */
+               log_error(LOG_LEVEL_ERROR,
+                  "Problems with tagger \'%s\': %s",
+                  b->name, pcrs_strerror(hits));
+            }
+            freez(modified_tag);
+         }
+      }
+
+      if (b->dynamic) pcrs_free_joblist(joblist);
+
+      /* If this tagger matched */
+      if (tag != csp->client_iob->cur)
+      {
+         if (0 == size)
+         {
+            /*
+             * There is no technical limitation which makes
+             * it impossible to use empty tags, but I assume
+             * no one would do it intentionally.
+             */
+            freez(tag);
+            log_error(LOG_LEVEL_TAGGING,
+               "Tagger \'%s\' created an empty tag. Ignored.", b->name);
+            continue;
+         }
+
+         if (list_contains_item(csp->action->multi[ACTION_MULTI_SUPPRESS_TAG], tag))
+         {
+            log_error(LOG_LEVEL_TAGGING,
+               "Tagger \'%s\' didn't add tag \'%s\': suppressed",
+               b->name, tag);
+            freez(tag);
+            continue;
+         }
+
+         if (!list_contains_item(csp->tags, tag))
+         {
+            if (JB_ERR_OK != enlist(csp->tags, tag))
+            {
+               log_error(LOG_LEVEL_ERROR,
+                  "Insufficient memory to add tag \'%s\', "
+                  "based on tagger \'%s\'",
+                  tag, b->name);
+            }
+            else
+            {
+               char *action_message;
+               /*
+                * update the action bits right away, to make
+                * tagging based on tags set by earlier taggers
+                * of the same kind possible.
+                */
+               if (update_action_bits_for_tag(csp, tag))
+               {
+                  action_message = "Action bits updated accordingly.";
+               }
+               else
+               {
+                  action_message = "No action bits update necessary.";
+               }
+
+               log_error(LOG_LEVEL_TAGGING,
+                  "Tagger \'%s\' added tag \'%s\'. %s",
+                  b->name, tag, action_message);
+            }
+         }
+         else
+         {
+            /* XXX: Is this log-worthy? */
+            log_error(LOG_LEVEL_TAGGING,
+               "Tagger \'%s\' didn't add tag \'%s\'. Tag already present",
+               b->name, tag);
+         }
+         freez(tag);
+      }
+   }
+
+   return JB_ERR_OK;
+}
+
+
 /*********************************************************************
  *
  * Function    :  get_url_actions
 /*********************************************************************
  *
  * Function    :  get_url_actions
@@ -2890,6 +3061,24 @@ int client_body_filters_enabled(const struct current_action_spec *action)
 }
 
 
 }
 
 
+/*********************************************************************
+ *
+ * Function    :  client_body_taggers_enabled
+ *
+ * Description :  Checks whether there are any client body taggers
+ *                enabled for the current request.
+ *
+ * Parameters  :
+ *          1  :  action = Action spec to check.
+ *
+ * Returns     :  TRUE for yes, FALSE otherwise
+ *
+ *********************************************************************/
+int client_body_taggers_enabled(const struct current_action_spec *action)
+{
+   return !list_is_empty(action->multi[ACTION_MULTI_CLIENT_BODY_TAGGER]);
+}
+
 /*********************************************************************
  *
  * Function    :  filters_available
 /*********************************************************************
  *
  * Function    :  filters_available
index 829a167..11b3e85 100644 (file)
--- a/filters.h
+++ b/filters.h
@@ -85,6 +85,7 @@ extern const struct forward_spec *forward_url(struct client_state *csp,
  */
 extern char *execute_content_filters(struct client_state *csp);
 extern int execute_client_body_filters(struct client_state *csp, size_t *content_length);
  */
 extern char *execute_content_filters(struct client_state *csp);
 extern int execute_client_body_filters(struct client_state *csp, size_t *content_length);
+extern jb_err execute_client_body_taggers(struct client_state *csp, size_t content_length);
 extern char *execute_single_pcrs_command(char *subject, const char *pcrs_command, int *hits);
 extern char *rewrite_url(char *old_url, const char *pcrs_command);
 
 extern char *execute_single_pcrs_command(char *subject, const char *pcrs_command, int *hits);
 extern char *rewrite_url(char *old_url, const char *pcrs_command);
 
@@ -93,6 +94,7 @@ extern pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, c
 extern int content_requires_filtering(struct client_state *csp);
 extern int content_filters_enabled(const struct current_action_spec *action);
 extern int client_body_filters_enabled(const struct current_action_spec *action);
 extern int content_requires_filtering(struct client_state *csp);
 extern int content_filters_enabled(const struct current_action_spec *action);
 extern int client_body_filters_enabled(const struct current_action_spec *action);
+extern int client_body_taggers_enabled(const struct current_action_spec *action);
 extern int filters_available(const struct client_state *csp);
 
 /*
 extern int filters_available(const struct client_state *csp);
 
 /*
diff --git a/jcc.c b/jcc.c
index 81222a4..19b2f49 100644 (file)
--- a/jcc.c
+++ b/jcc.c
@@ -2173,7 +2173,7 @@ static int update_client_headers(struct client_state *csp, size_t new_content_le
 
 /*********************************************************************
  *
 
 /*********************************************************************
  *
- * Function    : can_filter_request_body
+ * Function    : can_buffer_request_body
  *
  * Description : Checks if the current request body can be stored in
  *               the client_iob without hitting buffer limit.
  *
  * Description : Checks if the current request body can be stored in
  *               the client_iob without hitting buffer limit.
@@ -2185,7 +2185,7 @@ static int update_client_headers(struct client_state *csp, size_t new_content_le
  *               FALSE otherwise.
  *
  *********************************************************************/
  *               FALSE otherwise.
  *
  *********************************************************************/
-static int can_filter_request_body(const struct client_state *csp)
+static int can_buffer_request_body(const struct client_state *csp)
 {
    if (!can_add_to_iob(csp->client_iob, csp->config->buffer_limit,
                        csp->expected_client_content_length))
 {
    if (!can_add_to_iob(csp->client_iob, csp->config->buffer_limit,
                        csp->expected_client_content_length))
@@ -2210,7 +2210,7 @@ static int can_filter_request_body(const struct client_state *csp)
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
  *
  * Parameters  :
  *          1  :  csp = Current client state (buffers, headers, etc...)
  *
- * Returns     :  0 on success, anything else is an error.
+ * Returns     :  0 on success, 1 on error, 2 if the request got crunched.
  *
  *********************************************************************/
 static int send_http_request(struct client_state *csp)
  *
  *********************************************************************/
 static int send_http_request(struct client_state *csp)
@@ -2939,27 +2939,41 @@ static void continue_https_chat(struct client_state *csp)
    assert(csp->server_connection.sfd != JB_INVALID_SOCKET);
 
    if (csp->expected_client_content_length != 0 &&
    assert(csp->server_connection.sfd != JB_INVALID_SOCKET);
 
    if (csp->expected_client_content_length != 0 &&
-      client_body_filters_enabled(csp->action) &&
-      can_filter_request_body(csp))
+      (client_body_filters_enabled(csp->action) ||
+       client_body_taggers_enabled(csp->action)) &&
+      can_buffer_request_body(csp))
    {
       int content_modified;
    {
       int content_modified;
-      size_t buffered_content_length;
 
       if (read_https_request_body(csp))
       {
          /* XXX: handle */
          return;
       }
 
       if (read_https_request_body(csp))
       {
          /* XXX: handle */
          return;
       }
-      buffered_content_length = csp->expected_client_content_length;
-      content_modified  = execute_client_body_filters(csp, &buffered_content_length);
-      if ((content_modified == 1) &&
-         (buffered_content_length != csp->expected_client_content_length) &&
-         update_client_headers(csp, buffered_content_length))
+      if (client_body_taggers_enabled(csp->action))
       {
       {
-         log_error(LOG_LEVEL_HEADER, "Failed to update client headers "
-            "after filtering the encrypted client body");
-         /* XXX: handle */
-         return;
+         execute_client_body_taggers(csp, csp->expected_client_content_length);
+         if (crunch_response_triggered(csp, crunchers_all))
+         {
+            /*
+             * Yes. The client got the crunch response and we're done here.
+             */
+            return;
+         }
+      }
+      if (client_body_filters_enabled(csp->action))
+      {
+         size_t modified_content_length = csp->expected_client_content_length;
+         content_modified = execute_client_body_filters(csp,
+            &modified_content_length);
+         if ((content_modified == 1) &&
+            (modified_content_length != csp->expected_client_content_length) &&
+            update_client_headers(csp, modified_content_length))
+         {
+            /* XXX: Send error response */
+            log_error(LOG_LEVEL_HEADER, "Error updating client headers");
+            return;
+         }
       }
       csp->expected_client_content_length = 0;
    }
       }
       csp->expected_client_content_length = 0;
    }
@@ -4362,7 +4376,9 @@ static void chat(struct client_state *csp)
 
    /* If we need to apply client body filters, buffer the whole request now. */
    if (csp->expected_client_content_length != 0 &&
 
    /* If we need to apply client body filters, buffer the whole request now. */
    if (csp->expected_client_content_length != 0 &&
-      client_body_filters_enabled(csp->action) && can_filter_request_body(csp))
+      (client_body_filters_enabled(csp->action) ||
+         client_body_taggers_enabled(csp->action)) &&
+      can_buffer_request_body(csp))
    {
       int content_modified;
       size_t modified_content_length;
    {
       int content_modified;
       size_t modified_content_length;
@@ -4370,8 +4386,8 @@ static void chat(struct client_state *csp)
 #ifdef FEATURE_HTTPS_INSPECTION
       if (client_use_ssl(csp) && read_https_request_body(csp))
       {
 #ifdef FEATURE_HTTPS_INSPECTION
       if (client_use_ssl(csp) && read_https_request_body(csp))
       {
-         log_error(LOG_LEVEL_ERROR,
-            "Failed to buffer the encrypted request body to apply filters");
+         log_error(LOG_LEVEL_ERROR, "Failed to buffer the encrypted "
+            "request body to apply filters or taggers.");
          log_error(LOG_LEVEL_CLF,
             "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd);
 
          log_error(LOG_LEVEL_CLF,
             "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd);
 
@@ -4387,7 +4403,7 @@ static void chat(struct client_state *csp)
       if (read_http_request_body(csp))
       {
          log_error(LOG_LEVEL_ERROR,
       if (read_http_request_body(csp))
       {
          log_error(LOG_LEVEL_ERROR,
-            "Failed to buffer the request body to apply filters");
+            "Failed to buffer the request body to apply filters or taggers,");
          log_error(LOG_LEVEL_CLF,
             "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd);
 
          log_error(LOG_LEVEL_CLF,
             "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd);
 
@@ -4396,16 +4412,30 @@ static void chat(struct client_state *csp)
 
          return;
       }
 
          return;
       }
-      modified_content_length = csp->expected_client_content_length;
-      content_modified = execute_client_body_filters(csp,
-         &modified_content_length);
-      if ((content_modified == 1) &&
-         (modified_content_length != csp->expected_client_content_length) &&
-         update_client_headers(csp, modified_content_length))
+      if (client_body_taggers_enabled(csp->action))
       {
       {
-         /* XXX: Send error response */
-         log_error(LOG_LEVEL_HEADER, "Error updating client headers");
-         return;
+         execute_client_body_taggers(csp, csp->expected_client_content_length);
+         if (crunch_response_triggered(csp, crunchers_all))
+         {
+            /*
+             * Yes. The client got the crunch response and we're done here.
+             */
+            return;
+         }
+      }
+      if (client_body_filters_enabled(csp->action))
+      {
+         modified_content_length = csp->expected_client_content_length;
+         content_modified = execute_client_body_filters(csp,
+            &modified_content_length);
+         if ((content_modified == 1) &&
+            (modified_content_length != csp->expected_client_content_length) &&
+            update_client_headers(csp, modified_content_length))
+         {
+            /* XXX: Send error response */
+            log_error(LOG_LEVEL_HEADER, "Error updating client headers");
+            return;
+         }
       }
       csp->expected_client_content_length = 0;
    }
       }
       csp->expected_client_content_length = 0;
    }
@@ -4650,7 +4680,13 @@ static void chat(struct client_state *csp)
 #endif
            ))
    {
 #endif
            ))
    {
-      if (send_http_request(csp))
+      int status = send_http_request(csp);
+      if (status == 2)
+      {
+         /* The request got crunched, a response has been delivered. */
+         return;
+      }
+      if (status != 0)
       {
          rsp = error_response(csp, "connect-failed");
          if (rsp)
       {
          rsp = error_response(csp, "connect-failed");
          if (rsp)
index 5f21fd0..a9820df 100644 (file)
--- a/loaders.c
+++ b/loaders.c
@@ -1168,6 +1168,10 @@ int load_one_re_filterfile(struct client_state *csp, int fileid)
       {
          new_filter = FT_CLIENT_BODY_FILTER;
       }
       {
          new_filter = FT_CLIENT_BODY_FILTER;
       }
+      else if (strncmp(buf, "CLIENT-BODY-TAGGER:", 19) == 0)
+      {
+         new_filter = FT_CLIENT_BODY_TAGGER;
+      }
 
       /*
        * If this is the head of a new filter block, make it a
 
       /*
        * If this is the head of a new filter block, make it a
@@ -1190,6 +1194,10 @@ int load_one_re_filterfile(struct client_state *csp, int fileid)
          {
             new_bl->name = chomp(buf + 19);
          }
          {
             new_bl->name = chomp(buf + 19);
          }
+         else if (new_filter == FT_CLIENT_BODY_TAGGER)
+         {
+            new_bl->name = chomp(buf + 19);
+         }
          else
          {
             new_bl->name = chomp(buf + 21);
          else
          {
             new_bl->name = chomp(buf + 21);
index 527a868..e8bb678 100644 (file)
--- a/project.h
+++ b/project.h
@@ -642,8 +642,10 @@ struct iob
 #define ACTION_MULTI_SUPPRESS_TAG            7
 /** Index into current_action_spec::multi[] for client body filters to apply. */
 #define ACTION_MULTI_CLIENT_BODY_FILTER      8
 #define ACTION_MULTI_SUPPRESS_TAG            7
 /** Index into current_action_spec::multi[] for client body filters to apply. */
 #define ACTION_MULTI_CLIENT_BODY_FILTER      8
+/** Index into current_action_spec::multi[] for client body taggers to apply. */
+#define ACTION_MULTI_CLIENT_BODY_TAGGER      9
 /** Number of multi-string actions. */
 /** Number of multi-string actions. */
-#define ACTION_MULTI_COUNT                   9
+#define ACTION_MULTI_COUNT                  10
 
 
 /**
 
 
 /**
@@ -1296,9 +1298,10 @@ enum filter_type
    FT_SERVER_HEADER_TAGGER = 4,
    FT_SUPPRESS_TAG = 5,
    FT_CLIENT_BODY_FILTER = 6,
    FT_SERVER_HEADER_TAGGER = 4,
    FT_SUPPRESS_TAG = 5,
    FT_CLIENT_BODY_FILTER = 6,
-   FT_ADD_HEADER = 7,
+   FT_CLIENT_BODY_TAGGER = 7,
+   FT_ADD_HEADER = 8,
 #ifdef FEATURE_EXTERNAL_FILTERS
 #ifdef FEATURE_EXTERNAL_FILTERS
-   FT_EXTERNAL_CONTENT_FILTER = 8,
+   FT_EXTERNAL_CONTENT_FILTER = 9,
 #endif
    FT_INVALID_FILTER       = 42,
 };
 #endif
    FT_INVALID_FILTER       = 42,
 };