From: Fabian Keil Date: Tue, 23 Mar 2021 14:45:54 +0000 (+0100) Subject: Add a client-body-tagger action X-Git-Tag: v_3_0_34~123 X-Git-Url: http://www.privoxy.org/gitweb/%22https:/developer-manual/faq/static/user-manual/easr?a=commitdiff_plain;h=d128e6aa419ebf45411003e0e0276038e67d0b33;p=privoxy.git Add a client-body-tagger action ... which creates tags based on the content of the request body. Sponsored by: Robert Klemme --- diff --git a/actionlist.h b/actionlist.h index fc7f5142..d95b8226 100644 --- a/actionlist.h +++ b/actionlist.h @@ -57,6 +57,7 @@ DEFINE_ACTION_STRING ("change-x-forwarded-for", ACTION_CHANGE_X_FORWARDE DEFINE_CGI_PARAM_RADIO ("change-x-forwarded-for", ACTION_CHANGE_X_FORWARDED_FOR, ACTION_STRING_CHANGE_X_FORWARDED_FOR, "block", 0) DEFINE_CGI_PARAM_RADIO ("change-x-forwarded-for", ACTION_CHANGE_X_FORWARDED_FOR, ACTION_STRING_CHANGE_X_FORWARDED_FOR, "add", 1) DEFINE_ACTION_MULTI ("client-body-filter", ACTION_MULTI_CLIENT_BODY_FILTER) +DEFINE_ACTION_MULTI ("client-body-tagger", ACTION_MULTI_CLIENT_BODY_TAGGER) DEFINE_ACTION_MULTI ("client-header-filter", ACTION_MULTI_CLIENT_HEADER_FILTER) DEFINE_ACTION_MULTI ("client-header-tagger", ACTION_MULTI_CLIENT_HEADER_TAGGER) DEFINE_ACTION_STRING ("content-type-overwrite", ACTION_CONTENT_TYPE_OVERWRITE, ACTION_STRING_CONTENT_TYPE) diff --git a/actions.c b/actions.c index a80ee646..7fbcd3a3 100644 --- a/actions.c +++ b/actions.c @@ -1115,6 +1115,8 @@ static const char *filter_type_to_string(enum filter_type filter_type) return "suppress tag filter"; case FT_CLIENT_BODY_FILTER: return "client body filter"; + case FT_CLIENT_BODY_TAGGER: + return "client body tagger"; case FT_ADD_HEADER: return "add-header action"; #ifdef FEATURE_EXTERNAL_FILTERS diff --git a/filters.c b/filters.c index 2dd6503c..5533fcd1 100644 --- a/filters.c +++ b/filters.c @@ -2448,6 +2448,177 @@ int execute_client_body_filters(struct client_state *csp, size_t *content_length } +/********************************************************************* + * + * Function : execute_client_body_taggers + * + * Description : Executes client body taggers for the request that is + * buffered in the client_iob. + * XXX: Lots of code shared with header_tagger + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : content_length = content length. + * + * Returns : XXX + * + *********************************************************************/ +jb_err execute_client_body_taggers(struct client_state *csp, size_t content_length) +{ + enum filter_type wanted_filter_type = FT_CLIENT_BODY_TAGGER; + int multi_action_index = ACTION_MULTI_CLIENT_BODY_TAGGER; + pcrs_job *job; + + struct re_filterfile_spec *b; + struct list_entry *tag_name; + + assert(client_body_taggers_enabled(csp->action)); + + if (content_length == 0) + { + /* + * No content, no tagging necessary. + */ + return JB_ERR_OK; + } + + log_error(LOG_LEVEL_INFO, "Got to execute tagger on %N", + content_length, csp->client_iob->cur); + + if (list_is_empty(csp->action->multi[multi_action_index]) + || filters_available(csp) == FALSE) + { + /* Return early if no taggers apply or if none are available. */ + return JB_ERR_OK; + } + + /* Execute all applying taggers */ + for (tag_name = csp->action->multi[multi_action_index]->first; + NULL != tag_name; tag_name = tag_name->next) + { + char *modified_tag = NULL; + char *tag = csp->client_iob->cur; + size_t size = content_length; + pcrs_job *joblist; + + b = get_filter(csp, tag_name->str, wanted_filter_type); + if (b == NULL) + { + continue; + } + + joblist = b->joblist; + + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); + + if (NULL == joblist) + { + log_error(LOG_LEVEL_TAGGING, + "Tagger %s has empty joblist. Nothing to do.", b->name); + continue; + } + + /* execute their pcrs_joblist on the body. */ + for (job = joblist; NULL != job; job = job->next) + { + const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); + + if (0 < hits) + { + /* Success, continue with the modified version. */ + if (tag != csp->client_iob->cur) + { + freez(tag); + } + tag = modified_tag; + } + else + { + /* Tagger doesn't match */ + if (0 > hits) + { + /* Regex failure, log it but continue anyway. */ + log_error(LOG_LEVEL_ERROR, + "Problems with tagger \'%s\': %s", + b->name, pcrs_strerror(hits)); + } + freez(modified_tag); + } + } + + if (b->dynamic) pcrs_free_joblist(joblist); + + /* If this tagger matched */ + if (tag != csp->client_iob->cur) + { + if (0 == size) + { + /* + * There is no technical limitation which makes + * it impossible to use empty tags, but I assume + * no one would do it intentionally. + */ + freez(tag); + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' created an empty tag. Ignored.", b->name); + continue; + } + + if (list_contains_item(csp->action->multi[ACTION_MULTI_SUPPRESS_TAG], tag)) + { + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' didn't add tag \'%s\': suppressed", + b->name, tag); + freez(tag); + continue; + } + + if (!list_contains_item(csp->tags, tag)) + { + if (JB_ERR_OK != enlist(csp->tags, tag)) + { + log_error(LOG_LEVEL_ERROR, + "Insufficient memory to add tag \'%s\', " + "based on tagger \'%s\'", + tag, b->name); + } + else + { + char *action_message; + /* + * update the action bits right away, to make + * tagging based on tags set by earlier taggers + * of the same kind possible. + */ + if (update_action_bits_for_tag(csp, tag)) + { + action_message = "Action bits updated accordingly."; + } + else + { + action_message = "No action bits update necessary."; + } + + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' added tag \'%s\'. %s", + b->name, tag, action_message); + } + } + else + { + /* XXX: Is this log-worthy? */ + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' didn't add tag \'%s\'. Tag already present", + b->name, tag); + } + freez(tag); + } + } + + return JB_ERR_OK; +} + + /********************************************************************* * * Function : get_url_actions @@ -2890,6 +3061,24 @@ int client_body_filters_enabled(const struct current_action_spec *action) } +/********************************************************************* + * + * Function : client_body_taggers_enabled + * + * Description : Checks whether there are any client body taggers + * enabled for the current request. + * + * Parameters : + * 1 : action = Action spec to check. + * + * Returns : TRUE for yes, FALSE otherwise + * + *********************************************************************/ +int client_body_taggers_enabled(const struct current_action_spec *action) +{ + return !list_is_empty(action->multi[ACTION_MULTI_CLIENT_BODY_TAGGER]); +} + /********************************************************************* * * Function : filters_available diff --git a/filters.h b/filters.h index 829a167a..11b3e85c 100644 --- a/filters.h +++ b/filters.h @@ -85,6 +85,7 @@ extern const struct forward_spec *forward_url(struct client_state *csp, */ extern char *execute_content_filters(struct client_state *csp); extern int execute_client_body_filters(struct client_state *csp, size_t *content_length); +extern jb_err execute_client_body_taggers(struct client_state *csp, size_t content_length); extern char *execute_single_pcrs_command(char *subject, const char *pcrs_command, int *hits); extern char *rewrite_url(char *old_url, const char *pcrs_command); @@ -93,6 +94,7 @@ extern pcrs_job *compile_dynamic_pcrs_job_list(const struct client_state *csp, c extern int content_requires_filtering(struct client_state *csp); extern int content_filters_enabled(const struct current_action_spec *action); extern int client_body_filters_enabled(const struct current_action_spec *action); +extern int client_body_taggers_enabled(const struct current_action_spec *action); extern int filters_available(const struct client_state *csp); /* diff --git a/jcc.c b/jcc.c index 81222a4f..19b2f497 100644 --- a/jcc.c +++ b/jcc.c @@ -2173,7 +2173,7 @@ static int update_client_headers(struct client_state *csp, size_t new_content_le /********************************************************************* * - * Function : can_filter_request_body + * Function : can_buffer_request_body * * Description : Checks if the current request body can be stored in * the client_iob without hitting buffer limit. @@ -2185,7 +2185,7 @@ static int update_client_headers(struct client_state *csp, size_t new_content_le * FALSE otherwise. * *********************************************************************/ -static int can_filter_request_body(const struct client_state *csp) +static int can_buffer_request_body(const struct client_state *csp) { if (!can_add_to_iob(csp->client_iob, csp->config->buffer_limit, csp->expected_client_content_length)) @@ -2210,7 +2210,7 @@ static int can_filter_request_body(const struct client_state *csp) * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) * - * Returns : 0 on success, anything else is an error. + * Returns : 0 on success, 1 on error, 2 if the request got crunched. * *********************************************************************/ static int send_http_request(struct client_state *csp) @@ -2939,27 +2939,41 @@ static void continue_https_chat(struct client_state *csp) assert(csp->server_connection.sfd != JB_INVALID_SOCKET); if (csp->expected_client_content_length != 0 && - client_body_filters_enabled(csp->action) && - can_filter_request_body(csp)) + (client_body_filters_enabled(csp->action) || + client_body_taggers_enabled(csp->action)) && + can_buffer_request_body(csp)) { int content_modified; - size_t buffered_content_length; if (read_https_request_body(csp)) { /* XXX: handle */ return; } - buffered_content_length = csp->expected_client_content_length; - content_modified = execute_client_body_filters(csp, &buffered_content_length); - if ((content_modified == 1) && - (buffered_content_length != csp->expected_client_content_length) && - update_client_headers(csp, buffered_content_length)) + if (client_body_taggers_enabled(csp->action)) { - log_error(LOG_LEVEL_HEADER, "Failed to update client headers " - "after filtering the encrypted client body"); - /* XXX: handle */ - return; + execute_client_body_taggers(csp, csp->expected_client_content_length); + if (crunch_response_triggered(csp, crunchers_all)) + { + /* + * Yes. The client got the crunch response and we're done here. + */ + return; + } + } + if (client_body_filters_enabled(csp->action)) + { + size_t modified_content_length = csp->expected_client_content_length; + content_modified = execute_client_body_filters(csp, + &modified_content_length); + if ((content_modified == 1) && + (modified_content_length != csp->expected_client_content_length) && + update_client_headers(csp, modified_content_length)) + { + /* XXX: Send error response */ + log_error(LOG_LEVEL_HEADER, "Error updating client headers"); + return; + } } csp->expected_client_content_length = 0; } @@ -4362,7 +4376,9 @@ static void chat(struct client_state *csp) /* If we need to apply client body filters, buffer the whole request now. */ if (csp->expected_client_content_length != 0 && - client_body_filters_enabled(csp->action) && can_filter_request_body(csp)) + (client_body_filters_enabled(csp->action) || + client_body_taggers_enabled(csp->action)) && + can_buffer_request_body(csp)) { int content_modified; size_t modified_content_length; @@ -4370,8 +4386,8 @@ static void chat(struct client_state *csp) #ifdef FEATURE_HTTPS_INSPECTION if (client_use_ssl(csp) && read_https_request_body(csp)) { - log_error(LOG_LEVEL_ERROR, - "Failed to buffer the encrypted request body to apply filters"); + log_error(LOG_LEVEL_ERROR, "Failed to buffer the encrypted " + "request body to apply filters or taggers."); log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd); @@ -4387,7 +4403,7 @@ static void chat(struct client_state *csp) if (read_http_request_body(csp)) { log_error(LOG_LEVEL_ERROR, - "Failed to buffer the request body to apply filters"); + "Failed to buffer the request body to apply filters or taggers,"); log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 400 0", csp->ip_addr_str, csp->http->cmd); @@ -4396,16 +4412,30 @@ static void chat(struct client_state *csp) return; } - modified_content_length = csp->expected_client_content_length; - content_modified = execute_client_body_filters(csp, - &modified_content_length); - if ((content_modified == 1) && - (modified_content_length != csp->expected_client_content_length) && - update_client_headers(csp, modified_content_length)) + if (client_body_taggers_enabled(csp->action)) { - /* XXX: Send error response */ - log_error(LOG_LEVEL_HEADER, "Error updating client headers"); - return; + execute_client_body_taggers(csp, csp->expected_client_content_length); + if (crunch_response_triggered(csp, crunchers_all)) + { + /* + * Yes. The client got the crunch response and we're done here. + */ + return; + } + } + if (client_body_filters_enabled(csp->action)) + { + modified_content_length = csp->expected_client_content_length; + content_modified = execute_client_body_filters(csp, + &modified_content_length); + if ((content_modified == 1) && + (modified_content_length != csp->expected_client_content_length) && + update_client_headers(csp, modified_content_length)) + { + /* XXX: Send error response */ + log_error(LOG_LEVEL_HEADER, "Error updating client headers"); + return; + } } csp->expected_client_content_length = 0; } @@ -4650,7 +4680,13 @@ static void chat(struct client_state *csp) #endif )) { - if (send_http_request(csp)) + int status = send_http_request(csp); + if (status == 2) + { + /* The request got crunched, a response has been delivered. */ + return; + } + if (status != 0) { rsp = error_response(csp, "connect-failed"); if (rsp) diff --git a/loaders.c b/loaders.c index 5f21fd0b..a9820df5 100644 --- a/loaders.c +++ b/loaders.c @@ -1168,6 +1168,10 @@ int load_one_re_filterfile(struct client_state *csp, int fileid) { new_filter = FT_CLIENT_BODY_FILTER; } + else if (strncmp(buf, "CLIENT-BODY-TAGGER:", 19) == 0) + { + new_filter = FT_CLIENT_BODY_TAGGER; + } /* * If this is the head of a new filter block, make it a @@ -1190,6 +1194,10 @@ int load_one_re_filterfile(struct client_state *csp, int fileid) { new_bl->name = chomp(buf + 19); } + else if (new_filter == FT_CLIENT_BODY_TAGGER) + { + new_bl->name = chomp(buf + 19); + } else { new_bl->name = chomp(buf + 21); diff --git a/project.h b/project.h index 527a8688..e8bb6788 100644 --- a/project.h +++ b/project.h @@ -642,8 +642,10 @@ struct iob #define ACTION_MULTI_SUPPRESS_TAG 7 /** Index into current_action_spec::multi[] for client body filters to apply. */ #define ACTION_MULTI_CLIENT_BODY_FILTER 8 +/** Index into current_action_spec::multi[] for client body taggers to apply. */ +#define ACTION_MULTI_CLIENT_BODY_TAGGER 9 /** Number of multi-string actions. */ -#define ACTION_MULTI_COUNT 9 +#define ACTION_MULTI_COUNT 10 /** @@ -1296,9 +1298,10 @@ enum filter_type FT_SERVER_HEADER_TAGGER = 4, FT_SUPPRESS_TAG = 5, FT_CLIENT_BODY_FILTER = 6, - FT_ADD_HEADER = 7, + FT_CLIENT_BODY_TAGGER = 7, + FT_ADD_HEADER = 8, #ifdef FEATURE_EXTERNAL_FILTERS - FT_EXTERNAL_CONTENT_FILTER = 8, + FT_EXTERNAL_CONTENT_FILTER = 9, #endif FT_INVALID_FILTER = 42, };