X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=parsers.c;h=0bcdc422009e4f8ec3b8888e92ca0c549a6023af;hb=204e381da6f1ef179417c489810106a63d91bb1a;hp=16157916d0ebcfd2e3dba8221786d3e52aa1d4d5;hpb=9ac1b59093e80ebf3aa6640cb38cb60d6ca0b3ac;p=privoxy.git diff --git a/parsers.c b/parsers.c index 16157916..0bcdc422 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.127 2008/05/10 13:23:38 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.136 2008/05/26 16:02:24 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -44,6 +44,39 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.127 2008/05/10 13:23:38 fabiankei * * Revisions : * $Log: parsers.c,v $ + * Revision 1.136 2008/05/26 16:02:24 fabiankeil + * s@Insufficent@Insufficient@ + * + * Revision 1.135 2008/05/21 20:12:10 fabiankeil + * The whole point of strclean() is to modify the + * first parameter, so don't mark it immutable, + * even though the compiler lets us get away with it. + * + * Revision 1.134 2008/05/21 19:27:25 fabiankeil + * As the wafer actions are gone, we can stop including encode.h. + * + * Revision 1.133 2008/05/21 15:50:47 fabiankeil + * Ditch cast from (char **) to (char **). + * + * Revision 1.132 2008/05/21 15:47:14 fabiankeil + * Streamline sed()'s prototype and declare + * the header parse and add structures static. + * + * Revision 1.131 2008/05/20 20:13:30 fabiankeil + * Factor update_server_headers() out of sed(), ditch the + * first_run hack and make server_patterns_light static. + * + * Revision 1.130 2008/05/19 17:18:04 fabiankeil + * Wrap memmove() calls in string_move() + * to document the purpose in one place. + * + * Revision 1.129 2008/05/17 14:02:07 fabiankeil + * Normalize linear header white space. + * + * Revision 1.128 2008/05/16 16:39:03 fabiankeil + * If a header is split across multiple lines, + * merge them to a single line before parsing them. + * * Revision 1.127 2008/05/10 13:23:38 fabiankeil * Don't provide get_header() with the whole client state * structure when it only needs access to csp->iob. @@ -814,7 +847,6 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.127 2008/05/10 13:23:38 fabiankei #endif /* def FEATURE_PTHREAD */ #include "list.h" #include "parsers.h" -#include "encode.h" #include "ssplit.h" #include "errlog.h" #include "jbsockets.h" @@ -886,7 +918,22 @@ static jb_err create_fake_referrer(char **header, const char *fake_referrer); static jb_err handle_conditional_hide_referrer_parameter(char **header, const char *host, const int parameter_conditional_block); -const struct parsers client_patterns[] = { +/* + * List of functions to run on a list of headers. + */ +struct parsers +{ + /** The header prefix to match */ + const char *str; + + /** The length of the prefix to match */ + const size_t len; + + /** The function to apply to this line */ + const parser_func_ptr parser; +}; + +static const struct parsers client_patterns[] = { { "referer:", 8, client_referrer }, { "user-agent:", 11, client_uagent }, { "ua-", 3, client_ua }, @@ -912,7 +959,7 @@ const struct parsers client_patterns[] = { { NULL, 0, NULL } }; -const struct parsers server_patterns[] = { +static const struct parsers server_patterns[] = { { "HTTP/", 5, server_http }, { "set-cookie:", 11, server_set_cookie }, { "connection:", 11, connection }, @@ -928,16 +975,7 @@ const struct parsers server_patterns[] = { { NULL, 0, NULL } }; -const struct parsers server_patterns_light[] = { - { "Content-Length:", 15, server_content_length }, - { "Transfer-Encoding:", 18, server_transfer_coding }, -#ifdef FEATURE_ZLIB - { "Content-Encoding:", 17, server_content_encoding }, -#endif /* def FEATURE_ZLIB */ - { NULL, 0, NULL } -}; - -const add_header_func_ptr add_client_headers[] = { +static const add_header_func_ptr add_client_headers[] = { client_host_adder, client_xtra_adder, /* Temporarily disabled: client_accept_encoding_adder, */ @@ -945,7 +983,7 @@ const add_header_func_ptr add_client_headers[] = { NULL }; -const add_header_func_ptr add_server_headers[] = { +static const add_header_func_ptr add_server_headers[] = { connection_close_adder, NULL }; @@ -1437,6 +1475,102 @@ jb_err decompress_iob(struct client_state *csp) #endif /* defined(FEATURE_ZLIB) */ +/********************************************************************* + * + * Function : string_move + * + * Description : memmove wrapper to move the last part of a string + * towards the beginning, overwriting the part in + * the middle. strlcpy() can't be used here as the + * strings overlap. + * + * Parameters : + * 1 : dst = Destination to overwrite + * 2 : src = Source to move. + * + * Returns : N/A + * + *********************************************************************/ +static void string_move(char *dst, char *src) +{ + assert(dst < src); + + /* +1 to copy the terminating nul as well. */ + memmove(dst, src, strlen(src)+1); +} + + +/********************************************************************* + * + * Function : normalize_lws + * + * Description : Reduces unquoted linear white space in headers + * to a single space in accordance with RFC 2616 2.2. + * This simplifies parsing and filtering later on. + * + * XXX: Remove log messages before + * the next stable release? + * + * Parameters : + * 1 : header = A header with linear white space to reduce. + * + * Returns : N/A + * + *********************************************************************/ +static void normalize_lws(char *header) +{ + char *p = header; + + while (*p != '\0') + { + if (ijb_isspace(*p) && ijb_isspace(*(p+1))) + { + char *q = p+1; + + while (ijb_isspace(*q)) + { + q++; + } + log_error(LOG_LEVEL_HEADER, "Reducing white space in '%s'", header); + string_move(p+1, q); + } + + if (*p == '\t') + { + log_error(LOG_LEVEL_HEADER, + "Converting tab to space in '%s'", header); + *p = ' '; + } + else if (*p == '"') + { + char *end_of_token = strstr(p+1, "\""); + + if (NULL != end_of_token) + { + /* Don't mess with quoted text. */ + p = end_of_token; + } + else + { + log_error(LOG_LEVEL_HEADER, + "Ignoring single quote in '%s'", header); + } + } + p++; + } + + p = strchr(header, ':'); + if ((p != NULL) && (p != header) && ijb_isspace(*(p-1))) + { + /* + * There's still space before the colon. + * We don't want it. + */ + string_move(p-1, p); + } +} + + /********************************************************************* * * Function : get_header @@ -1503,6 +1637,8 @@ char *get_header(struct iob *iob) } } + normalize_lws(header); + return header; } @@ -1653,79 +1789,106 @@ static jb_err scan_headers(struct client_state *csp) * As a side effect it frees the space used by the original * header lines. * - * XXX: should be split to remove the first_run hack. - * * Parameters : - * 1 : pats = list of patterns to match against headers - * 2 : more_headers = list of functions to add more - * headers (client or server) - * 3 : csp = Current client state (buffers, headers, etc...) + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : filter_server_headers = Boolean to switch between + * server and header filtering. * * Returns : JB_ERR_OK in case off success, or * JB_ERR_MEMORY on out-of-memory error. * *********************************************************************/ -jb_err sed(const struct parsers pats[], - const add_header_func_ptr more_headers[], - struct client_state *csp) +jb_err sed(struct client_state *csp, int filter_server_headers) { + /* XXX: use more descriptive names. */ struct list_entry *p; const struct parsers *v; const add_header_func_ptr *f; jb_err err = JB_ERR_OK; - int first_run; - - /* - * If filtering is enabled, sed is run twice, - * but most of the work needs to be done only once. - */ - first_run = (more_headers != NULL ) ? 1 : 0; - if (first_run) /* Parse and print */ + if (filter_server_headers) + { + v = server_patterns; + f = add_server_headers; + } + else { - scan_headers(csp); + v = client_patterns; + f = add_client_headers; + } + + scan_headers(csp); - for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++) + while ((err == JB_ERR_OK) && (v->str != NULL)) + { + for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next) { - for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next) - { - /* Header crunch()ed in previous run? -> ignore */ - if (p->str == NULL) continue; + /* Header crunch()ed in previous run? -> ignore */ + if (p->str == NULL) continue; - /* Does the current parser handle this header? */ - if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING)) - { - err = v->parser(csp, (char **)&(p->str)); - } + /* Does the current parser handle this header? */ + if ((strncmpic(p->str, v->str, v->len) == 0) || + (v->len == CHECK_EVERY_HEADER_REMAINING)) + { + err = v->parser(csp, &(p->str)); } } - /* place any additional headers on the csp->headers list */ - for (f = more_headers; (err == JB_ERR_OK) && (*f) ; f++) - { - err = (*f)(csp); - } + v++; } - else /* Parse only */ + + /* place additional headers on the csp->headers list */ + while ((err == JB_ERR_OK) && (*f)) { - /* - * The second run is only needed if the body was modified - * and the content-lenght has changed. - */ - if (strncmpic(csp->http->cmd, "HEAD", 4)) + err = (*f)(csp); + f++; + } + + return err; +} + + +/********************************************************************* + * + * Function : update_server_headers + * + * Description : Updates server headers after the body has been modified. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK in case off success, or + * JB_ERR_MEMORY on out-of-memory error. + * + *********************************************************************/ +jb_err update_server_headers(struct client_state *csp) +{ + jb_err err = JB_ERR_OK; + + static const struct parsers server_patterns_light[] = { + { "Content-Length:", 15, server_content_length }, + { "Transfer-Encoding:", 18, server_transfer_coding }, +#ifdef FEATURE_ZLIB + { "Content-Encoding:", 17, server_content_encoding }, +#endif /* def FEATURE_ZLIB */ + { NULL, 0, NULL } + }; + + if (strncmpic(csp->http->cmd, "HEAD", 4)) + { + const struct parsers *v; + struct list_entry *p; + + for (v = server_patterns_light; (err == JB_ERR_OK) && (v->str != NULL); v++) { - /*XXX: Code duplication */ - for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++) + for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next) { - for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next) - { - /* Header crunch()ed in previous run? -> ignore */ - if (p->str == NULL) continue; + /* Header crunch()ed in previous run? -> ignore */ + if (p->str == NULL) continue; - /* Does the current parser handle this header? */ - if (strncmpic(p->str, v->str, v->len) == 0) - { - err = v->parser(csp, (char **)&(p->str)); - } + /* Does the current parser handle this header? */ + if (strncmpic(p->str, v->str, v->len) == 0) + { + err = v->parser(csp, (char **)&(p->str)); } } } @@ -1735,7 +1898,6 @@ jb_err sed(const struct parsers pats[], } - /********************************************************************* * * Function : header_tagger @@ -2678,7 +2840,7 @@ static jb_err server_last_modified(struct client_state *csp, char **header) if (*header == NULL) { - log_error(LOG_LEVEL_HEADER, "Insufficent memory. Last-Modified header got lost, boohoo."); + log_error(LOG_LEVEL_HEADER, "Insufficient memory. Last-Modified header got lost, boohoo."); } else { @@ -2737,21 +2899,19 @@ static jb_err server_last_modified(struct client_state *csp, char **header) if (*header == NULL) { - log_error(LOG_LEVEL_ERROR, "Insufficent memory, header crunched without replacement."); + log_error(LOG_LEVEL_ERROR, "Insufficient memory, header crunched without replacement."); return JB_ERR_MEMORY; } - if (LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */ - { - days = rtime / (3600 * 24); - hours = rtime / 3600 % 24; - minutes = rtime / 60 % 60; - seconds = rtime % 60; - - log_error(LOG_LEVEL_HEADER, "Randomized: %s (added %d da%s %d hou%s %d minut%s %d second%s", - *header, days, (days == 1) ? "y" : "ys", hours, (hours == 1) ? "r" : "rs", - minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)"); - } + days = rtime / (3600 * 24); + hours = rtime / 3600 % 24; + minutes = rtime / 60 % 60; + seconds = rtime % 60; + + log_error(LOG_LEVEL_HEADER, + "Randomized: %s (added %d da%s %d hou%s %d minut%s %d second%s", + *header, days, (days == 1) ? "y" : "ys", hours, (hours == 1) ? "r" : "rs", + minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)"); } else { @@ -2973,7 +3133,7 @@ static jb_err client_accept_language(struct client_state *csp, char **header) if (*header == NULL) { log_error(LOG_LEVEL_ERROR, - "Insufficent memory. Accept-Language header crunched without replacement."); + "Insufficient memory. Accept-Language header crunched without replacement."); } else { @@ -3436,20 +3596,19 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header) if (*header == NULL) { - log_error(LOG_LEVEL_HEADER, "Insufficent memory, header crunched without replacement."); + log_error(LOG_LEVEL_HEADER, "Insufficient memory, header crunched without replacement."); return JB_ERR_MEMORY; } - if (LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */ - { - hours = rtime / 3600; - minutes = rtime / 60 % 60; - seconds = rtime % 60; + hours = rtime / 3600; + minutes = rtime / 60 % 60; + seconds = rtime % 60; - log_error(LOG_LEVEL_HEADER, "Randomized: %s (%s %d hou%s %d minut%s %d second%s", - *header, (negative) ? "subtracted" : "added", hours, (hours == 1) ? "r" : "rs", - minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)"); - } + log_error(LOG_LEVEL_HEADER, + "Randomized: %s (%s %d hou%s %d minut%s %d second%s", + *header, (negative) ? "subtracted" : "added", hours, + (hours == 1) ? "r" : "rs", minutes, (minutes == 1) ? "e" : "es", + seconds, (seconds == 1) ? ")" : "s)"); } } } @@ -3901,7 +4060,7 @@ static jb_err server_set_cookie(struct client_state *csp, char **header) */ log_error(LOG_LEVEL_ERROR, "Can't parse \'%s\', send by %s. Unsupported time format?", cur_tag, csp->http->url); - memmove(cur_tag, next_tag, strlen(next_tag) + 1); + string_move(cur_tag, next_tag); changed = 1; } else @@ -3952,12 +4111,8 @@ static jb_err server_set_cookie(struct client_state *csp, char **header) /* * Still valid, delete expiration date by copying * the rest of the string over it. - * - * (Note that we cannot just use "strcpy(cur_tag, next_tag)", - * since the behaviour of strcpy is undefined for overlapping - * strings.) */ - memmove(cur_tag, next_tag, strlen(next_tag) + 1); + string_move(cur_tag, next_tag); /* That changed the header, need to issue a log message */ changed = 1; @@ -4004,7 +4159,7 @@ static jb_err server_set_cookie(struct client_state *csp, char **header) * Returns : Number of eliminations * *********************************************************************/ -int strclean(const char *string, const char *substring) +int strclean(char *string, const char *substring) { int hits = 0; size_t len;