-const char parsers_rcs[] = "$Id: parsers.c,v 1.62 2006/08/14 08:58:42 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.78 2006/12/26 17:19:20 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/parsers.c,v $
* `client_uagent', `client_x_forwarded',
* `client_x_forwarded_adder', `client_xtra_adder',
* `content_type', `crumble', `destroy_list', `enlist',
- * `flush_socket', ``get_header', `sed',
- * and `server_set_cookie'.
- *
- * Copyright : Written by and Copyright (C) 2001 the SourceForge
+ * `flush_socket', ``get_header', `sed', `filter_server_header'
+ * `filter_client_header', `filter_header', `crunch_server_header',
+ * `server_content_encoding', `server_content_disposition',
+ * `server_last_modified', `client_accept_language',
+ * `crunch_client_header', `client_if_modified_since',
+ * `client_if_none_match', `get_destination_from_headers',
+ * `parse_header_time' and `server_set_cookie'.
+ *
+ * Copyright : Written by and Copyright (C) 2001-2006 the SourceForge
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
*
* Revisions :
* $Log: parsers.c,v $
+ * Revision 1.78 2006/12/26 17:19:20 fabiankeil
+ * Bringing back the "useless" localtime() call
+ * I removed in revision 1.67. On some platforms
+ * it's necessary to prevent time zone offsets.
+ *
+ * Revision 1.77 2006/12/07 18:44:26 fabiankeil
+ * Rebuild request URL in get_destination_from_headers()
+ * to make sure redirect{pcrs command} works as expected
+ * for intercepted requests.
+ *
+ * Revision 1.76 2006/12/06 19:52:25 fabiankeil
+ * Added get_destination_from_headers().
+ *
+ * Revision 1.75 2006/11/13 19:05:51 fabiankeil
+ * Make pthread mutex locking more generic. Instead of
+ * checking for OSX and OpenBSD, check for FEATURE_PTHREAD
+ * and use mutex locking unless there is an _r function
+ * available. Better safe than sorry.
+ *
+ * Fixes "./configure --disable-pthread" and should result
+ * in less threading-related problems on pthread-using platforms,
+ * but it still doesn't fix BR#1122404.
+ *
+ * Revision 1.74 2006/10/02 16:59:12 fabiankeil
+ * The special header "X-Filter: No" now disables
+ * header filtering as well.
+ *
+ * Revision 1.73 2006/09/23 13:26:38 roro
+ * Replace TABs by spaces in source code.
+ *
+ * Revision 1.72 2006/09/23 12:37:21 fabiankeil
+ * Don't print a log message every time filter_headers is
+ * entered or left. It only creates noise without any real
+ * information.
+ *
+ * Revision 1.71 2006/09/21 19:55:17 fabiankeil
+ * Fix +hide-if-modified-since{-n}.
+ *
+ * Revision 1.70 2006/09/08 12:06:34 fabiankeil
+ * Have hide-if-modified-since interpret the random
+ * range value as minutes instead of hours. Allows
+ * more fine-grained configuration.
+ *
+ * Revision 1.69 2006/09/06 16:25:51 fabiankeil
+ * Always have parse_header_time return a pointer
+ * that actual makes sense, even though we currently
+ * only need it to detect problems.
+ *
+ * Revision 1.68 2006/09/06 10:43:32 fabiankeil
+ * Added config option enable-remote-http-toggle
+ * to specify if Privoxy should recognize special
+ * headers (currently only X-Filter) to change its
+ * behaviour. Disabled by default.
+ *
+ * Revision 1.67 2006/09/04 11:01:26 fabiankeil
+ * After filtering de-chunked instances, remove
+ * "Transfer-Encoding" header entirely instead of changing
+ * it to "Transfer-Encoding: identity", which is invalid.
+ * Thanks Michael Shields <shields@msrl.com>. Fixes PR 1318658.
+ *
+ * Don't use localtime in parse_header_time. An empty time struct
+ * is good enough, it gets overwritten by strptime anyway.
+ *
+ * Revision 1.66 2006/09/03 19:38:28 fabiankeil
+ * Use gmtime_r if available, fallback to gmtime with mutex
+ * protection for MacOSX and use vanilla gmtime for the rest.
+ *
+ * Revision 1.65 2006/08/22 10:55:56 fabiankeil
+ * Changed client_referrer to use the right type (size_t) for
+ * hostlenght and to shorten the temporary referrer string with
+ * '\0' instead of adding a useless line break.
+ *
+ * Revision 1.64 2006/08/17 17:15:10 fabiankeil
+ * - Back to timegm() using GnuPG's replacement if necessary.
+ * Using mktime() and localtime() could add a on hour offset if
+ * the randomize factor was big enough to lead to a summer/wintertime
+ * switch.
+ *
+ * - Removed now-useless Privoxy 3.0.3 compatibility glue.
+ *
+ * - Moved randomization code into pick_from_range().
+ *
+ * - Changed parse_header_time definition.
+ * time_t isn't guaranteed to be signed and
+ * if it isn't, -1 isn't available as error code.
+ * Changed some variable types in client_if_modified_since()
+ * because of the same reason.
+ *
+ * Revision 1.63 2006/08/14 13:18:08 david__schmidt
+ * OS/2 compilation compatibility fixups
+ *
* Revision 1.62 2006/08/14 08:58:42 fabiankeil
* Changed include from strptime.c to strptime.h
*
#include "project.h"
-#ifdef OSX_DARWIN
-#include <pthread.h>
+#ifdef FEATURE_PTHREAD
#include "jcc.h"
/* jcc.h is for mutex semapores only */
-#endif /* def OSX_DARWIN */
+#endif /* def FEATURE_PTHREAD */
#include "list.h"
#include "parsers.h"
#include "encode.h"
* or buffer limit reached.
*
*********************************************************************/
-jb_err add_to_iob(struct client_state *csp, char *buf, int n)
+jb_err add_to_iob(struct client_state *csp, char *buf, size_t n)
{
struct iob *iob = csp->iob;
size_t used, offset, need, want;
if (n <= 0) return JB_ERR_OK;
- used = iob->eod - iob->buf;
- offset = iob->cur - iob->buf;
+ used = (size_t)(iob->eod - iob->buf);
+ offset = (size_t)(iob->cur - iob->buf);
need = used + n + 1;
/*
int i, found_filters = 0;
-#ifndef MAX_AF_FILES
-# define MAX_AF_FILES 1
-# define INDEX_OR_NOT
-#else
-# define INDEX_OR_NOT [i]
-#endif
-
- log_error(LOG_LEVEL_RE_FILTER, "Entered filter_headers");
/*
* Need to check the set of re_filterfiles...
*/
for (i = 0; i < MAX_AF_FILES; i++)
{
- fl = csp->rlist INDEX_OR_NOT;
+ fl = csp->rlist[i];
if (NULL != fl)
{
if (NULL != fl->f)
for (i = 0; i < MAX_AF_FILES; i++)
{
- fl = csp->rlist INDEX_OR_NOT;
+ fl = csp->rlist[i];
if ((NULL == fl) || (NULL == fl->f))
break;
/*
* For all applying +filter actions, look if a filter by that
- * name exists and if yes, execute it's pcrs_joblist on the
+ * name exists and if yes, execute its pcrs_joblist on the
* buffer.
*/
for (b = fl->f; b; b = b->next)
if ( 0 == size )
{
- log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header);
+ log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header);
freez(*header);
}
- log_error(LOG_LEVEL_RE_FILTER, "Leaving filter headers");
- return(JB_ERR_OK);
+ return(JB_ERR_OK);
}
*
* Description : - Prohibit filtering (CT_TABOO) if transfer coding compresses
* - Raise the CSP_FLAG_CHUNKED flag if coding is "chunked"
- * - Change from "chunked" to "identity" if body was chunked
- * but has been de-chunked for filtering.
+ * - Remove header if body was chunked but has been
+ * de-chunked for filtering.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
csp->flags |= CSP_FLAG_CHUNKED;
/*
- * If the body was modified, it has been
- * de-chunked first, so adjust the header:
+ * If the body was modified, it has been de-chunked first
+ * and the header must be removed.
*/
if (csp->flags & CSP_FLAG_MODIFIED)
{
+ log_error(LOG_LEVEL_HEADER, "Removing: %s", *header);
freez(*header);
- *header = strdup("Transfer-Encoding: identity");
- log_error(LOG_LEVEL_HEADER, "Set: %s", *header);
- return (header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
}
}
char buf[BUFFER_SIZE];
char newheader[50];
+#ifdef HAVE_GMTIME_R
+ struct tm gmt;
+#endif
struct tm *timeptr = NULL;
time_t now, last_modified;
long int rtime;
{
log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header);
now = time(NULL);
+#ifdef HAVE_GMTIME_R
+ timeptr = gmtime_r(&now, &gmt);
+#elif FEATURE_PTHREAD
+ pthread_mutex_lock(&gmtime_mutex);
+ timeptr = gmtime(&now);
+ pthread_mutex_unlock(&gmtime_mutex);
+#else
timeptr = gmtime(&now);
- if ( (last_modified = parse_header_time(*header, timeptr)) < 0 )
+#endif
+ if ((timeptr = parse_header_time(*header, &last_modified)) == NULL)
{
- log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header);
- freez(*header);
+ log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header);
+ freez(*header);
}
else
{
- rtime = difftime(now, last_modified);
+ rtime = (long int)difftime(now, last_modified);
if (rtime)
{
-#if !defined(_WIN32) && !defined(__OS2__)
- rtime = random() % rtime + 1;
-#else
- rtime = rand() % (long int)(rtime + 1);
-#endif /* (ifndef _WIN32 || __OS2__) */
+ rtime = pick_from_range(rtime);
last_modified += rtime;
- timeptr = localtime(&last_modified);
+#ifdef HAVE_GMTIME_R
+ timeptr = gmtime_r(&last_modified, &gmt);
+#elif FEATURE_PTHREAD
+ pthread_mutex_lock(&gmtime_mutex);
+ timeptr = gmtime(&last_modified);
+ pthread_mutex_unlock(&gmtime_mutex);
+#else
+ timeptr = gmtime(&last_modified);
+#endif
strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr);
freez(*header);
*header = strdup("Last-Modified: ");
if (*header == NULL)
{
- log_error(LOG_LEVEL_ERROR, " Insufficent memory, header crunched without replacement.");
+ log_error(LOG_LEVEL_ERROR, "Insufficent memory, header crunched without replacement.");
return JB_ERR_MEMORY;
}
const char *newval;
const char *host;
char *referer;
- int hostlenght;
+ size_t hostlenght;
#ifdef FEATURE_FORCE_LOAD
/* Since the referrer can include the prefix even
*if www.example.org/www.example.com-shall-see-the-referer/
*links to www.example.com/
*/
- referer[hostlenght+17] = '\n';
+ referer[hostlenght+17] = '\0';
}
if ( 0 == strstr(referer, host)) /*Host has changed*/
{
jb_err client_if_modified_since(struct client_state *csp, char **header)
{
char newheader[50];
+#ifdef HAVE_GMTIME_R
+ struct tm gmt;
+#endif
struct tm *timeptr = NULL;
time_t tm = 0;
const char *newval;
- time_t rtime;
- time_t hours, minutes, seconds;
+ long int rtime;
+ long int hours, minutes, seconds;
int negative = 0;
char * endptr;
}
else /* add random value */
{
- if ( (tm = parse_header_time(*header, timeptr)) < 0 )
+ if ((timeptr = parse_header_time(*header, &tm)) == NULL)
{
log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header);
freez(*header);
}
else
{
- rtime = (time_t) strtol(newval, &endptr, 0);
+ rtime = strtol(newval, &endptr, 0);
if(rtime)
{
- log_error(LOG_LEVEL_HEADER, "Randomizing: %s (random range: %d hou%s)",
- *header, rtime, (rtime == 1 || rtime == -1) ? "r": "rs");
- rtime *= 3600;
-#if !defined(_WIN32) && !defined(__OS2__)
- rtime = random() % rtime + 1;
-#else
- rtime = rand() % (long int)(rtime + 1);
-#endif /* (_WIN32 || __OS2__) */
- if(newval[0] == '-')
+ log_error(LOG_LEVEL_HEADER, "Randomizing: %s (random range: %d minut%s)",
+ *header, rtime, (rtime == 1 || rtime == -1) ? "e": "es");
+ if(rtime < 0)
{
- rtime *= -1;
+ rtime *= -1;
+ negative = 1;
}
+ rtime *= 60;
+ rtime = pick_from_range(rtime);
}
else
{
log_error(LOG_LEVEL_ERROR, "Random range is 0. Assuming time transformation test.",
*header);
}
- tm += rtime;
- timeptr = localtime(&tm);
+ tm += rtime * (negative ? -1 : 1);
+#ifdef HAVE_GMTIME_R
+ timeptr = gmtime_r(&tm, &gmt);
+#elif FEATURE_PTHREAD
+ pthread_mutex_lock(&gmtime_mutex);
+ timeptr = gmtime(&tm);
+ pthread_mutex_unlock(&gmtime_mutex);
+#else
+ timeptr = gmtime(&tm);
+#endif
strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr);
freez(*header);
if (*header == NULL)
{
- log_error(LOG_LEVEL_HEADER, " Insufficent memory, header crunched without replacement.");
+ log_error(LOG_LEVEL_HEADER, "Insufficent memory, header crunched without replacement.");
return JB_ERR_MEMORY;
}
if(LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */
{
- if(rtime < 0)
- {
- rtime *= -1;
- negative = 1;
- }
- hours = (int)rtime / 3600 % 24;
- minutes = (int)rtime / 60 % 60;
- seconds = (int)rtime % 60;
+ hours = rtime / 3600;
+ minutes = rtime / 60 % 60;
+ seconds = rtime % 60;
log_error(LOG_LEVEL_HEADER, "Randomized: %s (%s %d hou%s %d minut%s %d second%s",
*header, (negative) ? "subtracted" : "added", hours, (hours == 1) ? "r" : "rs",
{
if ( 0 == strcmpic(*header, "X-Filter: No"))
{
- if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
+ if (!(csp->config->feature_flags & RUNTIME_FEATURE_HTTP_TOGGLE))
{
- log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to disable filtering!");
+ log_error(LOG_LEVEL_INFO, "Ignored the client's request to fetch without filtering.");
}
else
- {
- csp->content_type = CT_TABOO;
- log_error(LOG_LEVEL_HEADER, "Disabled filter mode on behalf of the client.");
+ {
+ if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
+ {
+ log_error(LOG_LEVEL_HEADER, "force-text-mode overruled the client's request to fetch without filtering!");
+ }
+ else
+ {
+ csp->content_type = CT_TABOO;
+ csp->action->flags &= ~ACTION_FILTER_SERVER_HEADERS;
+ csp->action->flags &= ~ACTION_FILTER_CLIENT_HEADERS;
+ log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering.");
+ }
+ log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
+ freez(*header);
}
- log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
- freez(*header);
}
return JB_ERR_OK;
}
time (&now);
#ifdef HAVE_LOCALTIME_R
tm_now = *localtime_r(&now, &tm_now);
-#elif OSX_DARWIN
+#elif FEATURE_PTHREAD
pthread_mutex_lock(&localtime_mutex);
tm_now = *localtime (&now);
pthread_mutex_unlock(&localtime_mutex);
*********************************************************************/
int strclean(const char *string, const char *substring)
{
- int hits = 0, len = strlen(substring);
+ int hits = 0;
+ size_t len;
char *pos, *p;
+ len = strlen(substring);
+
while((pos = strstr(string, substring)) != NULL)
{
p = pos + len;
*
* Parameters :
* 1 : header = header to parse
- * 2 : timeptr = storage for the resulting time structure
+ * 2 : tm = storage for the resulting time in seconds
*
- * Returns : Time in seconds since Unix epoch or -1 for failure.
+ * Returns : Time struct containing the header time, or
+ * NULL in case of a parsing problems.
*
*********************************************************************/
-time_t parse_header_time(char *header, struct tm *timeptr) {
+struct tm *parse_header_time(char *header, time_t *tm) {
char * timestring;
- time_t tm;
+ struct tm gmt;
+ struct tm * timeptr;
+
+ /*
+ * Initializing gmt to prevent time zone offsets.
+ *
+ * While this is only necessary on some platforms
+ * (mingw32 for example), I don't know how to
+ * detect these automatically and doing it everywhere
+ * shouldn't hurt.
+ */
+ time(tm);
+#ifdef HAVE_LOCALTIME_R
+ gmt = *localtime_r(tm, &gmt);
+#elif FEATURE_PTHREAD
+ pthread_mutex_lock(&localtime_mutex);
+ gmt = *localtime(tm);
+ pthread_mutex_unlock(&localtime_mutex);
+#else
+ gmt = *localtime(tm);
+#endif
- tm = time(NULL);
- timeptr = localtime(&tm);
/* Skipping header name */
timestring = strstr(header, ": ");
- if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", timeptr) == NULL)
+ if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", &gmt) == NULL)
{
- return(-1);
+ timeptr = NULL;
+ }
+ else
+ {
+ *tm = timegm(&gmt);
+ timeptr = &gmt;
}
- tm = mktime(timeptr);
- return(tm);
+ return(timeptr);
+
+}
+
+/*********************************************************************
+ *
+ * Function : get_destination_from_headers
+ *
+ * Description : Parse the "Host:" header to get the request's destination.
+ * Only needed if the client's request was forcefully
+ * redirected into Privoxy.
+ *
+ * Code mainly copied from client_host() which is currently
+ * run too late for this purpose.
+ *
+ * Parameters :
+ * 1 : headers = List of headers (one of them hopefully being
+ * the "Host:" header)
+ * 2 : http = storage for the result (host, port and hostport).
+ *
+ * Returns : JB_ERR_MEMORY in case of memory problems,
+ * JB_ERR_PARSE if the host header couldn't be found,
+ * JB_ERR_OK otherwise.
+ *
+ *********************************************************************/
+jb_err get_destination_from_headers(const struct list *headers, struct http_request *http)
+{
+ char *q;
+ char *p;
+ char *host;
+
+ host = get_header_value(headers, "Host:");
+
+ if (NULL == host)
+ {
+ log_error(LOG_LEVEL_ERROR, "No \"Host:\" header found.");
+ return JB_ERR_PARSE;
+ }
+
+ if (NULL == (p = strdup((host))))
+ {
+ log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
+ return JB_ERR_MEMORY;
+ }
+ chomp(p);
+ if (NULL == (q = strdup(p)))
+ {
+ freez(p);
+ log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
+ return JB_ERR_MEMORY;
+ }
+
+ freez(http->hostport);
+ http->hostport = p;
+ freez(http->host);
+ http->host = q;
+ q = strchr(http->host, ':');
+ if (q != NULL)
+ {
+ /* Terminate hostname and evaluate port string */
+ *q++ = '\0';
+ http->port = atoi(q);
+ }
+ else
+ {
+ http->port = http->ssl ? 443 : 80;
+ }
+
+ /* Rebuild request URL */
+ freez(http->url);
+ http->url = strdup(http->ssl ? "https://" : "http://");
+ string_append(&http->url, http->hostport);
+ string_append(&http->url, http->path);
+ if (http->url == NULL)
+ {
+ return JB_ERR_MEMORY;
+ }
+
+ log_error(LOG_LEVEL_HEADER, "Destination extracted from \"Host:\" header. New request URL: %s",
+ http->url);
+
+ return JB_ERR_OK;
+
}