X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=parsers.c;h=77120cd7093e36274e8964e60b6da4199bde23ae;hb=4de8a1b332008457fc074903db5f21cc7c00ad00;hp=e2c321152e03ee76823c09f1eb2d68f9f915e3d0;hpb=505af329e2df53d96f998537d7c1137de728f7b3;p=privoxy.git diff --git a/parsers.c b/parsers.c index e2c32115..77120cd7 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.61 2006/08/14 08:25:19 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.79 2006/12/29 18:04:40 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -10,10 +10,15 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.61 2006/08/14 08:25:19 fabiankeil * `client_uagent', `client_x_forwarded', * `client_x_forwarded_adder', `client_xtra_adder', * `content_type', `crumble', `destroy_list', `enlist', - * `flush_socket', ``get_header', `sed', - * and `server_set_cookie'. - * - * Copyright : Written by and Copyright (C) 2001 the SourceForge + * `flush_socket', ``get_header', `sed', `filter_server_header' + * `filter_client_header', `filter_header', `crunch_server_header', + * `server_content_encoding', `server_content_disposition', + * `server_last_modified', `client_accept_language', + * `crunch_client_header', `client_if_modified_since', + * `client_if_none_match', `get_destination_from_headers', + * `parse_header_time' and `server_set_cookie'. + * + * Copyright : Written by and Copyright (C) 2001-2006 the SourceForge * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -40,6 +45,103 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.61 2006/08/14 08:25:19 fabiankeil * * Revisions : * $Log: parsers.c,v $ + * Revision 1.79 2006/12/29 18:04:40 fabiankeil + * Fixed gcc43 conversion warnings. + * + * Revision 1.78 2006/12/26 17:19:20 fabiankeil + * Bringing back the "useless" localtime() call + * I removed in revision 1.67. On some platforms + * it's necessary to prevent time zone offsets. + * + * Revision 1.77 2006/12/07 18:44:26 fabiankeil + * Rebuild request URL in get_destination_from_headers() + * to make sure redirect{pcrs command} works as expected + * for intercepted requests. + * + * Revision 1.76 2006/12/06 19:52:25 fabiankeil + * Added get_destination_from_headers(). + * + * Revision 1.75 2006/11/13 19:05:51 fabiankeil + * Make pthread mutex locking more generic. Instead of + * checking for OSX and OpenBSD, check for FEATURE_PTHREAD + * and use mutex locking unless there is an _r function + * available. Better safe than sorry. + * + * Fixes "./configure --disable-pthread" and should result + * in less threading-related problems on pthread-using platforms, + * but it still doesn't fix BR#1122404. + * + * Revision 1.74 2006/10/02 16:59:12 fabiankeil + * The special header "X-Filter: No" now disables + * header filtering as well. + * + * Revision 1.73 2006/09/23 13:26:38 roro + * Replace TABs by spaces in source code. + * + * Revision 1.72 2006/09/23 12:37:21 fabiankeil + * Don't print a log message every time filter_headers is + * entered or left. It only creates noise without any real + * information. + * + * Revision 1.71 2006/09/21 19:55:17 fabiankeil + * Fix +hide-if-modified-since{-n}. + * + * Revision 1.70 2006/09/08 12:06:34 fabiankeil + * Have hide-if-modified-since interpret the random + * range value as minutes instead of hours. Allows + * more fine-grained configuration. + * + * Revision 1.69 2006/09/06 16:25:51 fabiankeil + * Always have parse_header_time return a pointer + * that actual makes sense, even though we currently + * only need it to detect problems. + * + * Revision 1.68 2006/09/06 10:43:32 fabiankeil + * Added config option enable-remote-http-toggle + * to specify if Privoxy should recognize special + * headers (currently only X-Filter) to change its + * behaviour. Disabled by default. + * + * Revision 1.67 2006/09/04 11:01:26 fabiankeil + * After filtering de-chunked instances, remove + * "Transfer-Encoding" header entirely instead of changing + * it to "Transfer-Encoding: identity", which is invalid. + * Thanks Michael Shields . Fixes PR 1318658. + * + * Don't use localtime in parse_header_time. An empty time struct + * is good enough, it gets overwritten by strptime anyway. + * + * Revision 1.66 2006/09/03 19:38:28 fabiankeil + * Use gmtime_r if available, fallback to gmtime with mutex + * protection for MacOSX and use vanilla gmtime for the rest. + * + * Revision 1.65 2006/08/22 10:55:56 fabiankeil + * Changed client_referrer to use the right type (size_t) for + * hostlenght and to shorten the temporary referrer string with + * '\0' instead of adding a useless line break. + * + * Revision 1.64 2006/08/17 17:15:10 fabiankeil + * - Back to timegm() using GnuPG's replacement if necessary. + * Using mktime() and localtime() could add a on hour offset if + * the randomize factor was big enough to lead to a summer/wintertime + * switch. + * + * - Removed now-useless Privoxy 3.0.3 compatibility glue. + * + * - Moved randomization code into pick_from_range(). + * + * - Changed parse_header_time definition. + * time_t isn't guaranteed to be signed and + * if it isn't, -1 isn't available as error code. + * Changed some variable types in client_if_modified_since() + * because of the same reason. + * + * Revision 1.63 2006/08/14 13:18:08 david__schmidt + * OS/2 compilation compatibility fixups + * + * Revision 1.62 2006/08/14 08:58:42 fabiankeil + * Changed include from strptime.c to strptime.h + * * Revision 1.61 2006/08/14 08:25:19 fabiankeil * Split filter-headers{} into filter-client-headers{} * and filter-server-headers{}. @@ -468,11 +570,10 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.61 2006/08/14 08:25:19 fabiankeil #include "project.h" -#ifdef OSX_DARWIN -#include +#ifdef FEATURE_PTHREAD #include "jcc.h" /* jcc.h is for mutex semapores only */ -#endif /* def OSX_DARWIN */ +#endif /* def FEATURE_PTHREAD */ #include "list.h" #include "parsers.h" #include "encode.h" @@ -626,9 +727,9 @@ jb_err add_to_iob(struct client_state *csp, char *buf, int n) if (n <= 0) return JB_ERR_OK; - used = iob->eod - iob->buf; - offset = iob->cur - iob->buf; - need = used + n + 1; + used = (size_t)(iob->eod - iob->buf); + offset = (size_t)(iob->cur - iob->buf); + need = used + (size_t)n + 1; /* * If the buffer can't hold the new data, extend it first. @@ -967,20 +1068,12 @@ jb_err filter_header(struct client_state *csp, char **header) int i, found_filters = 0; -#ifndef MAX_AF_FILES -# define MAX_AF_FILES 1 -# define INDEX_OR_NOT -#else -# define INDEX_OR_NOT [i] -#endif - - log_error(LOG_LEVEL_RE_FILTER, "Entered filter_headers"); /* * Need to check the set of re_filterfiles... */ for (i = 0; i < MAX_AF_FILES; i++) { - fl = csp->rlist INDEX_OR_NOT; + fl = csp->rlist[i]; if (NULL != fl) { if (NULL != fl->f) @@ -999,12 +1092,12 @@ jb_err filter_header(struct client_state *csp, char **header) for (i = 0; i < MAX_AF_FILES; i++) { - fl = csp->rlist INDEX_OR_NOT; + fl = csp->rlist[i]; if ((NULL == fl) || (NULL == fl->f)) break; /* * For all applying +filter actions, look if a filter by that - * name exists and if yes, execute it's pcrs_joblist on the + * name exists and if yes, execute its pcrs_joblist on the * buffer. */ for (b = fl->f; b; b = b->next) @@ -1062,12 +1155,11 @@ jb_err filter_header(struct client_state *csp, char **header) if ( 0 == size ) { - log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header); + log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header); freez(*header); } - log_error(LOG_LEVEL_RE_FILTER, "Leaving filter headers"); - return(JB_ERR_OK); + return(JB_ERR_OK); } @@ -1228,8 +1320,8 @@ jb_err server_content_type(struct client_state *csp, char **header) * * Description : - Prohibit filtering (CT_TABOO) if transfer coding compresses * - Raise the CSP_FLAG_CHUNKED flag if coding is "chunked" - * - Change from "chunked" to "identity" if body was chunked - * but has been de-chunked for filtering. + * - Remove header if body was chunked but has been + * de-chunked for filtering. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -1260,15 +1352,13 @@ jb_err server_transfer_coding(struct client_state *csp, char **header) csp->flags |= CSP_FLAG_CHUNKED; /* - * If the body was modified, it has been - * de-chunked first, so adjust the header: + * If the body was modified, it has been de-chunked first + * and the header must be removed. */ if (csp->flags & CSP_FLAG_MODIFIED) { + log_error(LOG_LEVEL_HEADER, "Removing: %s", *header); freez(*header); - *header = strdup("Transfer-Encoding: identity"); - log_error(LOG_LEVEL_HEADER, "Set: %s", *header); - return (header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK; } } @@ -1467,6 +1557,9 @@ jb_err server_last_modified(struct client_state *csp, char **header) char buf[BUFFER_SIZE]; char newheader[50]; +#ifdef HAVE_GMTIME_R + struct tm gmt; +#endif struct tm *timeptr = NULL; time_t now, last_modified; long int rtime; @@ -1515,24 +1608,36 @@ jb_err server_last_modified(struct client_state *csp, char **header) { log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header); now = time(NULL); +#ifdef HAVE_GMTIME_R + timeptr = gmtime_r(&now, &gmt); +#elif FEATURE_PTHREAD + pthread_mutex_lock(&gmtime_mutex); + timeptr = gmtime(&now); + pthread_mutex_unlock(&gmtime_mutex); +#else timeptr = gmtime(&now); - if ( (last_modified = parse_header_time(*header, timeptr)) < 0 ) +#endif + if ((timeptr = parse_header_time(*header, &last_modified)) == NULL) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header); - freez(*header); + log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header); + freez(*header); } else { - rtime = difftime(now, last_modified); + rtime = (long int)difftime(now, last_modified); if (rtime) { -#if !defined(_WIN32) && !defined(__OS2__) - rtime = random() % rtime + 1; -#else - rtime = rand() % rtime + 1; -#endif /* (ifndef _WIN32 || __OS2__) */ + rtime = pick_from_range(rtime); last_modified += rtime; - timeptr = localtime(&last_modified); +#ifdef HAVE_GMTIME_R + timeptr = gmtime_r(&last_modified, &gmt); +#elif FEATURE_PTHREAD + pthread_mutex_lock(&gmtime_mutex); + timeptr = gmtime(&last_modified); + pthread_mutex_unlock(&gmtime_mutex); +#else + timeptr = gmtime(&last_modified); +#endif strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr); freez(*header); *header = strdup("Last-Modified: "); @@ -1540,7 +1645,7 @@ jb_err server_last_modified(struct client_state *csp, char **header) if (*header == NULL) { - log_error(LOG_LEVEL_ERROR, " Insufficent memory, header crunched without replacement."); + log_error(LOG_LEVEL_ERROR, "Insufficent memory, header crunched without replacement."); return JB_ERR_MEMORY; } @@ -1664,7 +1769,7 @@ jb_err client_referrer(struct client_state *csp, char **header) const char *newval; const char *host; char *referer; - int hostlenght; + size_t hostlenght; #ifdef FEATURE_FORCE_LOAD /* Since the referrer can include the prefix even @@ -1722,7 +1827,7 @@ jb_err client_referrer(struct client_state *csp, char **header) *if www.example.org/www.example.com-shall-see-the-referer/ *links to www.example.com/ */ - referer[hostlenght+17] = '\n'; + referer[hostlenght+17] = '\0'; } if ( 0 == strstr(referer, host)) /*Host has changed*/ { @@ -2213,11 +2318,14 @@ jb_err client_host(struct client_state *csp, char **header) jb_err client_if_modified_since(struct client_state *csp, char **header) { char newheader[50]; +#ifdef HAVE_GMTIME_R + struct tm gmt; +#endif struct tm *timeptr = NULL; time_t tm = 0; const char *newval; - time_t rtime; - time_t hours, minutes, seconds; + long int rtime; + long int hours, minutes, seconds; int negative = 0; char * endptr; @@ -2244,36 +2352,41 @@ jb_err client_if_modified_since(struct client_state *csp, char **header) } else /* add random value */ { - if ( (tm = parse_header_time(*header, timeptr)) < 0 ) + if ((timeptr = parse_header_time(*header, &tm)) == NULL) { log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header); freez(*header); } else { - rtime = (time_t) strtol(newval, &endptr, 0); + rtime = strtol(newval, &endptr, 0); if(rtime) { - log_error(LOG_LEVEL_HEADER, "Randomizing: %s (random range: %d hou%s)", - *header, rtime, (rtime == 1 || rtime == -1) ? "r": "rs"); - rtime *= 3600; -#if !defined(_WIN32) && !defined(__OS2__) - rtime = random() % rtime + 1; -#else - rtime = rand() % rtime + 1; -#endif /* (_WIN32 || __OS2__) */ - if(newval[0] == '-') + log_error(LOG_LEVEL_HEADER, "Randomizing: %s (random range: %d minut%s)", + *header, rtime, (rtime == 1 || rtime == -1) ? "e": "es"); + if(rtime < 0) { - rtime *= -1; + rtime *= -1; + negative = 1; } + rtime *= 60; + rtime = pick_from_range(rtime); } else { log_error(LOG_LEVEL_ERROR, "Random range is 0. Assuming time transformation test.", *header); } - tm += rtime; - timeptr = localtime(&tm); + tm += rtime * (negative ? -1 : 1); +#ifdef HAVE_GMTIME_R + timeptr = gmtime_r(&tm, &gmt); +#elif FEATURE_PTHREAD + pthread_mutex_lock(&gmtime_mutex); + timeptr = gmtime(&tm); + pthread_mutex_unlock(&gmtime_mutex); +#else + timeptr = gmtime(&tm); +#endif strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr); freez(*header); @@ -2282,18 +2395,13 @@ jb_err client_if_modified_since(struct client_state *csp, char **header) if (*header == NULL) { - log_error(LOG_LEVEL_HEADER, " Insufficent memory, header crunched without replacement."); + log_error(LOG_LEVEL_HEADER, "Insufficent memory, header crunched without replacement."); return JB_ERR_MEMORY; } if(LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */ { - if(rtime < 0) - { - rtime *= -1; - negative = 1; - } - hours = rtime / 3600 % 24; + hours = rtime / 3600; minutes = rtime / 60 % 60; seconds = rtime % 60; @@ -2357,17 +2465,26 @@ jb_err client_x_filter(struct client_state *csp, char **header) { if ( 0 == strcmpic(*header, "X-Filter: No")) { - if (csp->action->flags & ACTION_FORCE_TEXT_MODE) + if (!(csp->config->feature_flags & RUNTIME_FEATURE_HTTP_TOGGLE)) { - log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to disable filtering!"); + log_error(LOG_LEVEL_INFO, "Ignored the client's request to fetch without filtering."); } else - { - csp->content_type = CT_TABOO; - log_error(LOG_LEVEL_HEADER, "Disabled filter mode on behalf of the client."); + { + if (csp->action->flags & ACTION_FORCE_TEXT_MODE) + { + log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to fetch without filtering!"); + } + else + { + csp->content_type = CT_TABOO; + csp->action->flags &= ~ACTION_FILTER_SERVER_HEADERS; + csp->action->flags &= ~ACTION_FILTER_CLIENT_HEADERS; + log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering."); + } + log_error(LOG_LEVEL_HEADER, "Crunching %s", *header); + freez(*header); } - log_error(LOG_LEVEL_HEADER, "Crunching %s", *header); - freez(*header); } return JB_ERR_OK; } @@ -2692,7 +2809,7 @@ jb_err server_set_cookie(struct client_state *csp, char **header) time (&now); #ifdef HAVE_LOCALTIME_R tm_now = *localtime_r(&now, &tm_now); -#elif OSX_DARWIN +#elif FEATURE_PTHREAD pthread_mutex_lock(&localtime_mutex); tm_now = *localtime (&now); pthread_mutex_unlock(&localtime_mutex); @@ -2801,9 +2918,12 @@ jb_err server_set_cookie(struct client_state *csp, char **header) *********************************************************************/ int strclean(const char *string, const char *substring) { - int hits = 0, len = strlen(substring); + int hits = 0; + size_t len; char *pos, *p; + len = strlen(substring); + while((pos = strstr(string, substring)) != NULL) { p = pos + len; @@ -2829,26 +2949,131 @@ int strclean(const char *string, const char *substring) * * Parameters : * 1 : header = header to parse - * 2 : timeptr = storage for the resulting time structure + * 2 : tm = storage for the resulting time in seconds * - * Returns : Time in seconds since Unix epoch or -1 for failure. + * Returns : Time struct containing the header time, or + * NULL in case of a parsing problems. * *********************************************************************/ -time_t parse_header_time(char *header, struct tm *timeptr) { +struct tm *parse_header_time(char *header, time_t *tm) { char * timestring; - time_t tm; + struct tm gmt; + struct tm * timeptr; + + /* + * Initializing gmt to prevent time zone offsets. + * + * While this is only necessary on some platforms + * (mingw32 for example), I don't know how to + * detect these automatically and doing it everywhere + * shouldn't hurt. + */ + time(tm); +#ifdef HAVE_LOCALTIME_R + gmt = *localtime_r(tm, &gmt); +#elif FEATURE_PTHREAD + pthread_mutex_lock(&localtime_mutex); + gmt = *localtime(tm); + pthread_mutex_unlock(&localtime_mutex); +#else + gmt = *localtime(tm); +#endif - tm = time(NULL); - timeptr = localtime(&tm); /* Skipping header name */ timestring = strstr(header, ": "); - if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", timeptr) == NULL) + if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", &gmt) == NULL) { - return(-1); + timeptr = NULL; + } + else + { + *tm = timegm(&gmt); + timeptr = &gmt; } - tm = mktime(timeptr); - return(tm); + return(timeptr); + +} + +/********************************************************************* + * + * Function : get_destination_from_headers + * + * Description : Parse the "Host:" header to get the request's destination. + * Only needed if the client's request was forcefully + * redirected into Privoxy. + * + * Code mainly copied from client_host() which is currently + * run too late for this purpose. + * + * Parameters : + * 1 : headers = List of headers (one of them hopefully being + * the "Host:" header) + * 2 : http = storage for the result (host, port and hostport). + * + * Returns : JB_ERR_MEMORY in case of memory problems, + * JB_ERR_PARSE if the host header couldn't be found, + * JB_ERR_OK otherwise. + * + *********************************************************************/ +jb_err get_destination_from_headers(const struct list *headers, struct http_request *http) +{ + char *q; + char *p; + char *host; + + host = get_header_value(headers, "Host:"); + + if (NULL == host) + { + log_error(LOG_LEVEL_ERROR, "No \"Host:\" header found."); + return JB_ERR_PARSE; + } + + if (NULL == (p = strdup((host)))) + { + log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header"); + return JB_ERR_MEMORY; + } + chomp(p); + if (NULL == (q = strdup(p))) + { + freez(p); + log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header"); + return JB_ERR_MEMORY; + } + + freez(http->hostport); + http->hostport = p; + freez(http->host); + http->host = q; + q = strchr(http->host, ':'); + if (q != NULL) + { + /* Terminate hostname and evaluate port string */ + *q++ = '\0'; + http->port = atoi(q); + } + else + { + http->port = http->ssl ? 443 : 80; + } + + /* Rebuild request URL */ + freez(http->url); + http->url = strdup(http->ssl ? "https://" : "http://"); + string_append(&http->url, http->hostport); + string_append(&http->url, http->path); + if (http->url == NULL) + { + return JB_ERR_MEMORY; + } + + log_error(LOG_LEVEL_HEADER, "Destination extracted from \"Host:\" header. New request URL: %s", + http->url); + + return JB_ERR_OK; + }