X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=filters.c;h=baaaea5f44051d5f7f6858223855b8462098dfaf;hp=ef9661ec65d2fffadd18af43dc64f2ddad89f31d;hb=caacb81062e2e3152231c09c9da62a31e4efc170;hpb=e857ebe8f75d1104c664e090b4ba46b0e8b9a288 diff --git a/filters.c b/filters.c index ef9661ec..baaaea5f 100644 --- a/filters.c +++ b/filters.c @@ -66,6 +66,9 @@ #ifdef FEATURE_CLIENT_TAGS #include "client-tags.h" #endif +#ifdef FEATURE_HTTPS_INSPECTION +#include "ssl.h" +#endif #ifdef _WIN32 #include "win32.h" @@ -1194,7 +1197,6 @@ struct http_response *redirect_url(struct client_state *csp) */ char * redirect_mode; #endif /* def FEATURE_FAST_REDIRECTS */ - char *old_url = NULL; char *new_url = NULL; char *redirection_string; @@ -1220,8 +1222,36 @@ struct http_response *redirect_url(struct client_state *csp) if (*redirection_string == 's') { - old_url = csp->http->url; - new_url = rewrite_url(old_url, redirection_string); + char *requested_url; + +#ifdef FEATURE_HTTPS_INSPECTION + if (client_use_ssl(csp)) + { + jb_err err; + + requested_url = strdup_or_die("https://"); + err = string_append(&requested_url, csp->http->hostport); + if (!err) err = string_append(&requested_url, csp->http->path); + if (err) + { + log_error(LOG_LEVEL_FATAL, + "Failed to rebuild URL 'https://%s%s'", + csp->http->hostport, csp->http->path); + } + } + else +#endif + { + requested_url = csp->http->url; + } + new_url = rewrite_url(requested_url, redirection_string); +#ifdef FEATURE_HTTPS_INSPECTION + if (requested_url != csp->http->url) + { + assert(client_use_ssl(csp)); + freez(requested_url); + } +#endif } else { @@ -1235,6 +1265,8 @@ struct http_response *redirect_url(struct client_state *csp) #ifdef FEATURE_FAST_REDIRECTS if ((csp->action->flags & ACTION_FAST_REDIRECTS)) { + char *old_url; + redirect_mode = csp->action->string[ACTION_STRING_FAST_REDIRECTS]; /* @@ -1245,19 +1277,7 @@ struct http_response *redirect_url(struct client_state *csp) new_url = get_last_url(old_url, redirect_mode); freez(old_url); } - - /* - * Disable redirect checkers, so that they - * will be only run more than once if the user - * also enables them through tags. - * - * From a performance point of view - * it doesn't matter, but the duplicated - * log messages are annoying. - */ - csp->action->flags &= ~ACTION_FAST_REDIRECTS; #endif /* def FEATURE_FAST_REDIRECTS */ - csp->action->flags &= ~ACTION_REDIRECT; /* Did any redirect action trigger? */ if (new_url) @@ -1537,25 +1557,34 @@ struct re_filterfile_spec *get_filter(const struct client_state *csp, /********************************************************************* * - * Function : pcrs_filter_response + * Function : pcrs_filter_impl * * Description : Execute all text substitutions from all applying - * +filter actions on the text buffer that's been - * accumulated in csp->iob->buf. + * (based on filter_response_body value) +filter + * or +client_body_filter actions on the given buffer. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : filter_response_body = when TRUE execute +filter + * actions; execute +client_body_filter actions otherwise + * 3 : data = Target data + * 4 : data_len = Target data len * * Returns : a pointer to the (newly allocated) modified buffer. * or NULL if there were no hits or something went wrong * *********************************************************************/ -static char *pcrs_filter_response(struct client_state *csp) +static char *pcrs_filter_impl(const struct client_state *csp, int filter_response_body, + const char *data, size_t *data_len) { int hits = 0; size_t size, prev_size; + const int filters_idx = + filter_response_body ? ACTION_MULTI_FILTER : ACTION_MULTI_CLIENT_BODY_FILTER; + const enum filter_type filter_type = + filter_response_body ? FT_CONTENT_FILTER : FT_CLIENT_BODY_FILTER; - char *old = NULL; + const char *old = NULL; char *new = NULL; pcrs_job *job; @@ -1565,7 +1594,7 @@ static char *pcrs_filter_response(struct client_state *csp) /* * Sanity first */ - if (csp->iob->cur >= csp->iob->eod) + if (*data_len == 0) { return(NULL); } @@ -1577,15 +1606,15 @@ static char *pcrs_filter_response(struct client_state *csp) return(NULL); } - size = (size_t)(csp->iob->eod - csp->iob->cur); - old = csp->iob->cur; + size = *data_len; + old = data; /* - * For all applying +filter actions, look if a filter by that + * For all applying actions, look if a filter by that * name exists and if yes, execute it's pcrs_joblist on the * buffer. */ - for (filtername = csp->action->multi[ACTION_MULTI_FILTER]->first; + for (filtername = csp->action->multi[filters_idx]->first; filtername != NULL; filtername = filtername->next) { int current_hits = 0; /* Number of hits caused by this filter */ @@ -1593,7 +1622,7 @@ static char *pcrs_filter_response(struct client_state *csp) int job_hits = 0; /* How many hits the current job caused */ pcrs_job *joblist; - b = get_filter(csp, filtername->str, FT_CONTENT_FILTER); + b = get_filter(csp, filtername->str, filter_type); if (b == NULL) { continue; @@ -1624,7 +1653,7 @@ static char *pcrs_filter_response(struct client_state *csp) * input for the next one. */ current_hits += job_hits; - if (old != csp->iob->cur) + if (old != data) { freez(old); } @@ -1656,9 +1685,18 @@ static char *pcrs_filter_response(struct client_state *csp) if (b->dynamic) pcrs_free_joblist(joblist); - log_error(LOG_LEVEL_RE_FILTER, - "filtering %s%s (size %lu) with \'%s\' produced %d hits (new size %lu).", - csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size); + if (filter_response_body) + { + log_error(LOG_LEVEL_RE_FILTER, + "filtering %s%s (size %lu) with \'%s\' produced %d hits (new size %lu).", + csp->http->hostport, csp->http->path, prev_size, b->name, current_hits, size); + } + else + { + log_error(LOG_LEVEL_RE_FILTER, "filtering request body from client %s " + "(size %lu) with \'%s\' produced %d hits (new size %lu).", + csp->ip_addr_str, prev_size, b->name, current_hits, size); + } #ifdef FEATURE_EXTENDED_STATISTICS update_filter_statistics(b->name, current_hits); #endif @@ -1667,11 +1705,11 @@ static char *pcrs_filter_response(struct client_state *csp) /* * If there were no hits, destroy our copy and let - * chat() use the original in csp->iob + * chat() use the original content */ if (!hits) { - if (old != csp->iob->cur && old != new) + if (old != data && old != new) { freez(old); } @@ -1679,12 +1717,50 @@ static char *pcrs_filter_response(struct client_state *csp) return(NULL); } - csp->flags |= CSP_FLAG_MODIFIED; - csp->content_length = size; - clear_iob(csp->iob); - + *data_len = size; return(new); +} + +/********************************************************************* + * + * Function : pcrs_filter_response_body + * + * Description : Execute all text substitutions from all applying + * +filter actions on the text buffer that's been + * accumulated in csp->iob->buf. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : a pointer to the (newly allocated) modified buffer. + * or NULL if there were no hits or something went wrong + * + *********************************************************************/ +static char *pcrs_filter_response_body(struct client_state *csp) +{ + size_t size = (size_t)(csp->iob->eod - csp->iob->cur); + + char *new = NULL; + + /* + * Sanity first + */ + if (csp->iob->cur >= csp->iob->eod) + { + return NULL; + } + + new = pcrs_filter_impl(csp, TRUE, csp->iob->cur, &size); + + if (new != NULL) + { + csp->flags |= CSP_FLAG_MODIFIED; + csp->content_length = size; + clear_iob(csp->iob); + } + + return new; } @@ -1915,6 +1991,28 @@ static char *execute_external_filter(const struct client_state *csp, #endif /* def FEATURE_EXTERNAL_FILTERS */ +/********************************************************************* + * + * Function : pcrs_filter_request_body + * + * Description : Execute all text substitutions from all applying + * +client_body_filter actions on the given text buffer. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : data = Target data + * 3 : data_len = Target data len + * + * Returns : a pointer to the (newly allocated) modified buffer. + * or NULL if there were no hits or something went wrong + * + *********************************************************************/ +static char *pcrs_filter_request_body(const struct client_state *csp, const char *data, size_t *data_len) +{ + return pcrs_filter_impl(csp, FALSE, data, data_len); +} + + /********************************************************************* * * Function : gif_deanimate_response @@ -2003,7 +2101,7 @@ static filter_function_ptr get_filter_function(const struct client_state *csp) if ((csp->content_type & CT_TEXT) && (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) { - filter_function = pcrs_filter_response; + filter_function = pcrs_filter_response_body; } else if ((csp->content_type & CT_GIF) && (csp->action->flags & ACTION_DEANIMATE)) @@ -2015,6 +2113,172 @@ static filter_function_ptr get_filter_function(const struct client_state *csp) } +/********************************************************************* + * + * Function : get_bytes_to_next_chunk_start + * + * Description : Returns the number of bytes to the start of the + * next chunk in the buffer. + * + * Parameters : + * 1 : buffer = Pointer to the text buffer + * 2 : size = Number of bytes in the buffer. + * 3 : offset = Where to expect the beginning of the next chunk. + * + * Returns : -1 if the size can't be determined or data is missing, + * otherwise the number of bytes to the start of the next chunk + * or 0 if the last chunk has been fully buffered. + * + *********************************************************************/ +static int get_bytes_to_next_chunk_start(char *buffer, size_t size, size_t offset) +{ + char *chunk_start; + char *p; + unsigned int chunk_size = 0; + int bytes_to_skip; + + if (size <= offset || size < 5) + { + /* + * Not enough bytes bufferd to figure + * out the size of the next chunk. + */ + return -1; + } + + chunk_start = buffer + offset; + + p = strstr(chunk_start, "\r\n"); + if (NULL == p) + { + /* + * The line with the chunk-size hasn't been completely received + * yet (or is invalid). + */ + log_error(LOG_LEVEL_RE_FILTER, + "Not enough or invalid data in buffer in chunk size line."); + return -1; + } + + if (sscanf(chunk_start, "%x", &chunk_size) != 1) + { + /* XXX: Write test case to trigger this. */ + log_error(LOG_LEVEL_ERROR, "Failed to parse chunk size. " + "Size: %lu, offset: %lu. Chunk size start: %N", size, offset, + (size - offset), chunk_start); + return -1; + } + + /* + * To get to the start of the next chunk size we have to skip + * the line with the current chunk size followed by "\r\n" followd + * by the actual data and another "\r\n" following the data. + */ + bytes_to_skip = (int)(p - chunk_start) + 2 + (int)chunk_size + 2; + + if (bytes_to_skip <= 0) + { + log_error(LOG_LEVEL_ERROR, + "Failed to figure out chunk offset. %u and %d seem dubious.", + chunk_size, bytes_to_skip); + return -1; + } + if (chunk_size == 0) + { + if (bytes_to_skip <= (size - offset)) + { + return 0; + } + else + { + log_error(LOG_LEVEL_INFO, + "Last chunk detected but we're still missing data."); + return -1; + } + } + + return bytes_to_skip; +} + + +/********************************************************************* + * + * Function : get_bytes_missing_from_chunked_data + * + * Description : Figures out how many bytes of data we need to get + * to the start of the next chunk of data (XXX: terminology). + * Due to the nature of chunk-encoded data we can only see + * how many data is missing according to the last chunk size + * buffered. + * + * Parameters : + * 1 : buffer = Pointer to the text buffer + * 2 : size = Number of bytes in the buffer. + * 3 : offset = Where to expect the beginning of the next chunk. + * + * Returns : -1 if the data can't be parsed (yet), + * 0 if the buffer is complete or a + * number of bytes that is missing. + * + *********************************************************************/ +int get_bytes_missing_from_chunked_data(char *buffer, size_t size, size_t offset) +{ + int ret = -1; + int last_valid_offset = -1; + + if (size < offset || size < 5) + { + /* Not enough data buffered yet */ + return -1; + } + + do + { + ret = get_bytes_to_next_chunk_start(buffer, size, offset); + if (ret == -1) + { + return last_valid_offset; + } + if (ret == 0) + { + return 0; + } + if (offset != 0) + { + last_valid_offset = (int)offset; + } + offset += (size_t)ret; + } while (offset < size); + + return (int)offset; + +} + + +/********************************************************************* + * + * Function : chunked_data_is_complete + * + * Description : Detects if a buffer with chunk-encoded data looks + * complete. + * + * Parameters : + * 1 : buffer = Pointer to the text buffer + * 2 : size = Number of bytes in the buffer. + * 3 : offset = Where to expect the beginning of the + * first complete chunk. + * + * Returns : TRUE if it looks like the data is complete, + * FALSE otherwise. + * + *********************************************************************/ +int chunked_data_is_complete(char *buffer, size_t size, size_t offset) +{ + return (0 == get_bytes_missing_from_chunked_data(buffer, size, offset)); + +} + + /********************************************************************* * * Function : remove_chunked_transfer_coding @@ -2082,7 +2346,9 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) */ if (NULL == (from_p = strstr(from_p, "\r\n"))) { - log_error(LOG_LEVEL_ERROR, "Parse error while stripping \"chunked\" transfer coding"); + log_error(LOG_LEVEL_ERROR, + "Failed to strip \"chunked\" transfer coding. " + "Line with chunk size doesn't seem to end properly."); return JB_ERR_PARSE; } from_p += 2; @@ -2097,7 +2363,8 @@ static jb_err remove_chunked_transfer_coding(char *buffer, size_t *size) if (from_p + chunksize >= end_of_buffer) { log_error(LOG_LEVEL_ERROR, - "End of chunk is beyond the end of the buffer."); + "Failed to decode content for filtering. " + "One chunk end is beyond the end of the buffer."); return JB_ERR_PARSE; } @@ -2249,8 +2516,10 @@ char *execute_content_filters(struct client_state *csp) if (JB_ERR_OK != prepare_for_filtering(csp)) { /* - * failed to de-chunk or decompress. + * We failed to de-chunk or decompress, don't accept + * another request on the client connection. */ + csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE; return NULL; } @@ -2301,6 +2570,223 @@ char *execute_content_filters(struct client_state *csp) } +/********************************************************************* + * + * Function : execute_client_body_filters + * + * Description : Executes client body filters for the request that is buffered + * in the client_iob. The client_iob is updated with the filtered + * content. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : content_length = content length. Upon successful filtering + * the passed value is updated with the new content length. + * + * Returns : 1 if the content has been filterd. 0 if it hasn't. + * + *********************************************************************/ +int execute_client_body_filters(struct client_state *csp, size_t *content_length) +{ + char *filtered_content; + + assert(client_body_filters_enabled(csp->action)); + + if (content_length == 0) + { + /* + * No content, no filtering necessary. + */ + return 0; + } + + filtered_content = pcrs_filter_request_body(csp, csp->client_iob->cur, content_length); + if (filtered_content != NULL) + { + freez(csp->client_iob->buf); + csp->client_iob->buf = filtered_content; + csp->client_iob->cur = csp->client_iob->buf; + csp->client_iob->eod = csp->client_iob->cur + *content_length; + csp->client_iob->size = *content_length; + + return 1; + } + + return 0; +} + + +/********************************************************************* + * + * Function : execute_client_body_taggers + * + * Description : Executes client body taggers for the request that is + * buffered in the client_iob. + * XXX: Lots of code shared with header_tagger + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : content_length = content length. + * + * Returns : XXX + * + *********************************************************************/ +jb_err execute_client_body_taggers(struct client_state *csp, size_t content_length) +{ + enum filter_type wanted_filter_type = FT_CLIENT_BODY_TAGGER; + int multi_action_index = ACTION_MULTI_CLIENT_BODY_TAGGER; + pcrs_job *job; + + struct re_filterfile_spec *b; + struct list_entry *tag_name; + + assert(client_body_taggers_enabled(csp->action)); + + if (content_length == 0) + { + /* + * No content, no tagging necessary. + */ + return JB_ERR_OK; + } + + log_error(LOG_LEVEL_INFO, "Got to execute tagger on %N", + content_length, csp->client_iob->cur); + + if (list_is_empty(csp->action->multi[multi_action_index]) + || filters_available(csp) == FALSE) + { + /* Return early if no taggers apply or if none are available. */ + return JB_ERR_OK; + } + + /* Execute all applying taggers */ + for (tag_name = csp->action->multi[multi_action_index]->first; + NULL != tag_name; tag_name = tag_name->next) + { + char *modified_tag = NULL; + char *tag = csp->client_iob->cur; + size_t size = content_length; + pcrs_job *joblist; + + b = get_filter(csp, tag_name->str, wanted_filter_type); + if (b == NULL) + { + continue; + } + + joblist = b->joblist; + + if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b); + + if (NULL == joblist) + { + log_error(LOG_LEVEL_TAGGING, + "Tagger %s has empty joblist. Nothing to do.", b->name); + continue; + } + + /* execute their pcrs_joblist on the body. */ + for (job = joblist; NULL != job; job = job->next) + { + const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); + + if (0 < hits) + { + /* Success, continue with the modified version. */ + if (tag != csp->client_iob->cur) + { + freez(tag); + } + tag = modified_tag; + } + else + { + /* Tagger doesn't match */ + if (0 > hits) + { + /* Regex failure, log it but continue anyway. */ + log_error(LOG_LEVEL_ERROR, + "Problems with tagger \'%s\': %s", + b->name, pcrs_strerror(hits)); + } + freez(modified_tag); + } + } + + if (b->dynamic) pcrs_free_joblist(joblist); + + /* If this tagger matched */ + if (tag != csp->client_iob->cur) + { + if (0 == size) + { + /* + * There is no technical limitation which makes + * it impossible to use empty tags, but I assume + * no one would do it intentionally. + */ + freez(tag); + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' created an empty tag. Ignored.", b->name); + continue; + } + + if (list_contains_item(csp->action->multi[ACTION_MULTI_SUPPRESS_TAG], tag)) + { + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' didn't add tag \'%s\': suppressed", + b->name, tag); + freez(tag); + continue; + } + + if (!list_contains_item(csp->tags, tag)) + { + if (JB_ERR_OK != enlist(csp->tags, tag)) + { + log_error(LOG_LEVEL_ERROR, + "Insufficient memory to add tag \'%s\', " + "based on tagger \'%s\'", + tag, b->name); + } + else + { + char *action_message; + /* + * update the action bits right away, to make + * tagging based on tags set by earlier taggers + * of the same kind possible. + */ + if (update_action_bits_for_tag(csp, tag)) + { + action_message = "Action bits updated accordingly."; + } + else + { + action_message = "No action bits update necessary."; + } + + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' added tag \'%s\'. %s", + b->name, tag, action_message); + } + } + else + { + /* XXX: Is this log-worthy? */ + log_error(LOG_LEVEL_TAGGING, + "Tagger \'%s\' didn't add tag \'%s\'. Tag already present", + b->name, tag); + } + freez(tag); + } + } + + return JB_ERR_OK; +} + + /********************************************************************* * * Function : get_url_actions @@ -2724,6 +3210,43 @@ int content_filters_enabled(const struct current_action_spec *action) } +/********************************************************************* + * + * Function : client_body_filters_enabled + * + * Description : Checks whether there are any client body filters + * enabled for the current request. + * + * Parameters : + * 1 : action = Action spec to check. + * + * Returns : TRUE for yes, FALSE otherwise + * + *********************************************************************/ +int client_body_filters_enabled(const struct current_action_spec *action) +{ + return !list_is_empty(action->multi[ACTION_MULTI_CLIENT_BODY_FILTER]); +} + + +/********************************************************************* + * + * Function : client_body_taggers_enabled + * + * Description : Checks whether there are any client body taggers + * enabled for the current request. + * + * Parameters : + * 1 : action = Action spec to check. + * + * Returns : TRUE for yes, FALSE otherwise + * + *********************************************************************/ +int client_body_taggers_enabled(const struct current_action_spec *action) +{ + return !list_is_empty(action->multi[ACTION_MULTI_CLIENT_BODY_TAGGER]); +} + /********************************************************************* * * Function : filters_available @@ -2925,14 +3448,14 @@ void register_block_reason_for_statistics(const char *block_reason) { struct block_statistics_entry *entry; - privoxy_mutex_lock(&block_statistics_mutex); + privoxy_mutex_lock(&block_reason_statistics_mutex); if (block_statistics == NULL) { block_statistics = zalloc_or_die(sizeof(struct block_statistics_entry)); entry = block_statistics; entry->block_reason = strdup_or_die(block_reason); - privoxy_mutex_unlock(&block_statistics_mutex); + privoxy_mutex_unlock(&block_reason_statistics_mutex); return; } entry = block_statistics; @@ -2952,7 +3475,7 @@ void register_block_reason_for_statistics(const char *block_reason) entry = entry->next; } - privoxy_mutex_unlock(&block_statistics_mutex); + privoxy_mutex_unlock(&block_reason_statistics_mutex); } @@ -2973,7 +3496,7 @@ static void increment_block_reason_counter(const char *block_reason) { struct block_statistics_entry *entry; - privoxy_mutex_lock(&block_statistics_mutex); + privoxy_mutex_lock(&block_reason_statistics_mutex); entry = block_statistics; while (entry != NULL) @@ -2986,7 +3509,7 @@ static void increment_block_reason_counter(const char *block_reason) entry = entry->next; } - privoxy_mutex_unlock(&block_statistics_mutex); + privoxy_mutex_unlock(&block_reason_statistics_mutex); } @@ -3009,7 +3532,7 @@ void get_block_reason_count(const char *block_reason, unsigned long long *count) { struct block_statistics_entry *entry; - privoxy_mutex_lock(&block_statistics_mutex); + privoxy_mutex_lock(&block_reason_statistics_mutex); entry = block_statistics; while (entry != NULL) @@ -3022,7 +3545,7 @@ void get_block_reason_count(const char *block_reason, unsigned long long *count) entry = entry->next; } - privoxy_mutex_unlock(&block_statistics_mutex); + privoxy_mutex_unlock(&block_reason_statistics_mutex); }