-const char parsers_rcs[] = "$Id: parsers.c,v 1.230 2011/09/04 11:32:20 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.249 2012/09/04 08:33:17 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/parsers.c,v $
*
* Purpose : Declares functions to parse/crunch headers and pages.
- * Functions declared include:
- * `add_to_iob', `client_cookie_adder', `client_from',
- * `client_referrer', `client_send_cookie', `client_ua',
- * `client_uagent', `client_x_forwarded',
- * `client_x_forwarded_adder', `client_xtra_adder',
- * `content_type', `crumble', `destroy_list', `enlist',
- * `flush_socket', ``get_header', `sed', `filter_header'
- * `server_content_encoding', `server_content_disposition',
- * `server_last_modified', `client_accept_language',
- * `crunch_client_header', `client_if_modified_since',
- * `client_if_none_match', `get_destination_from_headers',
- * `parse_header_time', `decompress_iob' and `server_set_cookie'.
- *
- * Copyright : Written by and Copyright (C) 2001-2009 the
+ *
+ * Copyright : Written by and Copyright (C) 2001-2012 the
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
const char parsers_h_rcs[] = PARSERS_H_VERSION;
-/* Fix a problem with Solaris. There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses its argument directly
- * as an array index. Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- *
- * Why did they write a character function that can't take a simple
- * "char" argument? Doh!
- */
-#define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
-#define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
-
static char *get_header_line(struct iob *iob);
static jb_err scan_headers(struct client_state *csp);
static jb_err header_tagger(struct client_state *csp, char *header);
client_host_adder,
client_x_forwarded_for_adder,
client_xtra_adder,
- /* Temporarily disabled: client_accept_encoding_adder, */
client_connection_header_adder,
NULL
};
while (*p != '\0')
{
- if (ijb_isspace(*p) && ijb_isspace(*(p+1)))
+ if (privoxy_isspace(*p) && privoxy_isspace(*(p+1)))
{
char *q = p+1;
- while (ijb_isspace(*q))
+ while (privoxy_isspace(*q))
{
q++;
}
}
p = strchr(header, ':');
- if ((p != NULL) && (p != header) && ijb_isspace(*(p-1)))
+ if ((p != NULL) && (p != header) && privoxy_isspace(*(p-1)))
{
/*
* There's still space before the colon.
* Found: return pointer to start of value
*/
ret = cur_entry->str + length;
- while (*ret && ijb_isspace(*ret)) ret++;
+ while (*ret && privoxy_isspace(*ret)) ret++;
return ret;
}
}
}
+/*********************************************************************
+ *
+ * Function : enforce_header_order
+ *
+ * Description : Enforces a given header order.
+ *
+ * Parameters :
+ * 1 : headers = List of headers to order.
+ * 2 : ordered_headers = List of ordered header names.
+ *
+ * Returns : N/A
+ *
+ *********************************************************************/
+static void enforce_header_order(struct list *headers, const struct list *ordered_headers)
+{
+ struct list_entry *sorted_header;
+ struct list new_headers[1];
+ struct list_entry *header;
+
+ init_list(new_headers);
+
+ /* The request line is always the first "header" */
+
+ assert(NULL != headers->first->str);
+ enlist(new_headers, headers->first->str);
+ freez(headers->first->str)
+
+ /* Enlist the specified headers in the given order */
+
+ for (sorted_header = ordered_headers->first; sorted_header != NULL;
+ sorted_header = sorted_header->next)
+ {
+ const size_t sorted_header_length = strlen(sorted_header->str);
+ for (header = headers->first; header != NULL; header = header->next)
+ {
+ /* Header enlisted in previous run? -> ignore */
+ if (header->str == NULL) continue;
+
+ if (0 == strncmpic(sorted_header->str, header->str, sorted_header_length)
+ && (header->str[sorted_header_length] == ':'))
+ {
+ log_error(LOG_LEVEL_HEADER, "Enlisting sorted header %s", header->str);
+ if (JB_ERR_OK != enlist(new_headers, header->str))
+ {
+ log_error(LOG_LEVEL_HEADER, "Failed to enlist %s", header->str);
+ }
+ freez(header->str);
+ }
+ }
+ }
+
+ /* Enlist the rest of the headers behind the ordered ones */
+ for (header = headers->first; header != NULL; header = header->next)
+ {
+ /* Header enlisted in previous run? -> ignore */
+ if (header->str == NULL) continue;
+
+ log_error(LOG_LEVEL_HEADER,
+ "Enlisting left-over header %s", header->str);
+ if (JB_ERR_OK != enlist(new_headers, header->str))
+ {
+ log_error(LOG_LEVEL_HEADER, "Failed to enlist %s", header->str);
+ }
+ freez(header->str);
+ }
+
+ list_remove_all(headers);
+ list_duplicate(headers, new_headers);
+ list_remove_all(new_headers);
+
+ return;
+}
+
/*********************************************************************
*
* Function : sed
f++;
}
+ if (!filter_server_headers && !list_is_empty(csp->config->ordered_client_headers))
+ {
+ enforce_header_order(csp->headers, csp->config->ordered_client_headers);
+ }
+
return err;
}
for (job = joblist; NULL != job; job = job->next)
{
matches = pcrs_execute(job, *header, size, &newheader, &size);
- if ( 0 < matches )
+ if (0 < matches)
{
current_hits += matches;
log_error(LOG_LEVEL_HEADER, "Transforming \"%s\" to \"%s\"", *header, newheader);
freez(*header);
*header = newheader;
}
- else if ( 0 == matches )
+ else if (0 == matches)
{
/* Filter doesn't change header */
freez(newheader);
* to remove the header. This function frees the
* original string if necessary.
*
- * Returns : JB_ERR_OK on success, or
- * JB_ERR_MEMORY on out-of-memory error.
+ * Returns : JB_ERR_OK on success.
*
*********************************************************************/
static jb_err server_connection(struct client_state *csp, char **header)
#ifdef FEATURE_CONNECTION_KEEP_ALIVE
&& !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
#endif
- )
+ )
{
#ifdef FEATURE_CONNECTION_KEEP_ALIVE
if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE))
{
char *old_header = *header;
- *header = strdup("Connection: close");
- if (header == NULL)
- {
- return JB_ERR_MEMORY;
- }
+ *header = strdup_or_die("Connection: close");
log_error(LOG_LEVEL_HEADER, "Replaced: \'%s\' with \'%s\'", old_header, *header);
freez(old_header);
}
* Content-Length header.
*
* Parameters :
- * 1 : header = The Content-Length header.
+ * 1 : header_value = The Content-Length header value.
* 2 : length = Storage to return the value.
*
* Returns : JB_ERR_OK on success, or
* JB_ERR_PARSE if no value is recognized.
*
*********************************************************************/
-static jb_err get_content_length(const char *header, unsigned long long *length)
+static jb_err get_content_length(const char *header_value, unsigned long long *length)
{
- assert(header[14] == ':');
-
#ifdef _WIN32
assert(sizeof(unsigned long long) > 4);
- if (1 != sscanf(header+14, ": %I64u", length))
+ if (1 != sscanf(header_value, "%I64u", length))
#else
- if (1 != sscanf(header+14, ": %llu", length))
+ if (1 != sscanf(header_value, "%llu", length))
#endif
{
return JB_ERR_PARSE;
static jb_err client_save_content_length(struct client_state *csp, char **header)
{
unsigned long long content_length = 0;
+ const char *header_value;
assert(*(*header+14) == ':');
- if (JB_ERR_OK != get_content_length(*header, &content_length))
+ header_value = *header + 15;
+ if (JB_ERR_OK != get_content_length(header_value, &content_length))
{
log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
freez(*header);
* to remove the header. This function frees the
* original string if necessary.
*
- * Returns : JB_ERR_OK on success, or
- * JB_ERR_MEMORY on out-of-memory error.
+ * Returns : JB_ERR_OK on success.
*
*********************************************************************/
static jb_err client_connection(struct client_state *csp, char **header)
if (!strcmpic(*header, connection_close))
{
#ifdef FEATURE_CONNECTION_KEEP_ALIVE
- if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_SHARING))
+ if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_SHARING)
+ && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED))
{
if (!strcmpic(csp->http->ver, "HTTP/1.1"))
{
log_error(LOG_LEVEL_HEADER,
"Removing \'%s\' to imply keep-alive.", *header);
freez(*header);
+ /*
+ * While we imply keep-alive to the server,
+ * we have to remember that the client didn't.
+ */
+ csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
}
else
{
char *old_header = *header;
- *header = strdup("Connection: keep-alive");
- if (header == NULL)
- {
- return JB_ERR_MEMORY;
- }
+ *header = strdup_or_die("Connection: keep-alive");
log_error(LOG_LEVEL_HEADER,
"Replaced: \'%s\' with \'%s\'", old_header, *header);
freez(old_header);
csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
}
}
- else if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE))
+ else if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE)
+ && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED))
{
log_error(LOG_LEVEL_HEADER,
"Keeping the client header '%s' around. "
{
char *old_header = *header;
- *header = strdup(connection_close);
- if (header == NULL)
- {
- return JB_ERR_MEMORY;
- }
+ *header = strdup_or_die(connection_close);
log_error(LOG_LEVEL_HEADER,
"Replaced: \'%s\' with \'%s\'", old_header, *header);
freez(old_header);
if ((csp->content_type & CT_TEXT) || (csp->action->flags & ACTION_FORCE_TEXT_MODE))
{
freez(*header);
- *header = strdup("Content-Type: ");
+ *header = strdup_or_die("Content-Type: ");
string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]);
if (header == NULL)
static jb_err server_save_content_length(struct client_state *csp, char **header)
{
unsigned long long content_length = 0;
+ const char *header_value;
assert(*(*header+14) == ':');
- if (JB_ERR_OK != get_content_length(*header, &content_length))
+ header_value = *header + 15;
+ if (JB_ERR_OK != get_content_length(header_value, &content_length))
{
log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
freez(*header);
static jb_err server_last_modified(struct client_state *csp, char **header)
{
const char *newval;
- char buf[BUFFER_SIZE];
time_t last_modified;
char newheader[50];
newval = csp->action->string[ACTION_STRING_LAST_MODIFIED];
- if (0 == strcmpic(newval, "block") )
+ if (0 == strcmpic(newval, "block"))
{
/*
* Blocking Last-Modified header. Useless but why not.
/*
* Setting Last-Modified Header to now.
*/
+ char buf[30];
get_http_time(0, buf, sizeof(buf));
freez(*header);
*header = strdup("Last-Modified: ");
newval = csp->action->string[ACTION_STRING_LANGUAGE];
- if ((newval == NULL) || (0 == strcmpic(newval, "block")) )
+ if ((newval == NULL) || (0 == strcmpic(newval, "block")))
{
/*
* Blocking Accept-Language header
/*
* Are we blocking the e-mail address?
*/
- if ((newval == NULL) || (0 == strcmpic(newval, "block")) )
+ if ((newval == NULL) || (0 == strcmpic(newval, "block")))
{
log_error(LOG_LEVEL_HEADER, "crunched From!");
return JB_ERR_OK;
*csp->http->hostport == ' ' || *csp->http->hostport == '\0')
{
- if (NULL == (p = strdup((*header)+6)))
- {
- return JB_ERR_MEMORY;
- }
+ p = strdup_or_die((*header)+6);
chomp(p);
- if (NULL == (q = strdup(p)))
- {
- freez(p);
- return JB_ERR_MEMORY;
- }
+ q = strdup_or_die(p);
freez(csp->http->hostport);
csp->http->hostport = p;
const char *newval;
char * endptr;
- if ( 0 == strcmpic(*header, "If-Modified-Since: Wed, 08 Jun 1955 12:00:00 GMT"))
+ if (0 == strcmpic(*header, "If-Modified-Since: Wed, 08 Jun 1955 12:00:00 GMT"))
{
/*
* The client got an error message because of a temporary problem,
*********************************************************************/
jb_err client_x_filter(struct client_state *csp, char **header)
{
- if ( 0 == strcmpic(*header, "X-Filter: No"))
+ if (0 == strcmpic(*header, "X-Filter: No"))
{
if (!(csp->config->feature_flags & RUNTIME_FEATURE_HTTP_TOGGLE))
{
* Function : client_range
*
* Description : Removes Range, Request-Range and If-Range headers if
- * content filtering is enabled. If the client's version
- * of the document has been altered by Privoxy, the server
- * could interpret the range differently than the client
- * intended in which case the user could end up with
- * corrupted content.
+ * content filtering is enabled and the range doesn't
+ * start at byte 0.
+ *
+ * If the client's version of the document has been
+ * altered by Privoxy, the server could interpret the
+ * range differently than the client intended in which
+ * case the user could end up with corrupted content.
+ *
+ * If the range starts at byte 0 this isn't an issue
+ * so the header can pass. Partial requests like this
+ * are used to render preview images for videos without
+ * downloading the whole video.
+ *
+ * While HTTP doesn't require that range requests are
+ * honoured and the client could simply abort the download
+ * after receiving a sufficient amount of data, various
+ * clients don't handle complete responses to range
+ * requests gracefully and emit misleading error messages
+ * instead.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
*********************************************************************/
static jb_err client_range(struct client_state *csp, char **header)
{
- if (content_filters_enabled(csp->action))
+ if (content_filters_enabled(csp->action)
+ && (0 != strncmpic(strstr(*header, ":"), ": bytes=0-", 10)))
{
log_error(LOG_LEVEL_HEADER, "Content filtering is enabled."
" Crunching: \'%s\' to prevent range-mismatch problems.", *header);
return JB_ERR_OK;
}
- if ( !csp->http->hostport || !*(csp->http->hostport))
+ if (!csp->http->hostport || !*(csp->http->hostport))
{
/* XXX: When does this happen and why is it OK? */
log_error(LOG_LEVEL_INFO, "Weirdness in client_host_adder detected and ignored.");
/*
* remove 'user:pass@' from 'proto://user:pass@host'
*/
- if ( (p = strchr( csp->http->hostport, '@')) != NULL )
+ if ((p = strchr( csp->http->hostport, '@')) != NULL)
{
p++;
}
}
-#if 0
-/*********************************************************************
- *
- * Function : client_accept_encoding_adder
- *
- * Description : Add an Accept-Encoding header to the client's request
- * that disables compression if the action applies, and
- * the header is not already there. Called from `sed'.
- * Note: For HTTP/1.0, the absence of the header is enough.
- *
- * Parameters :
- * 1 : csp = Current client state (buffers, headers, etc...)
- *
- * Returns : JB_ERR_OK on success, or
- * JB_ERR_MEMORY on out-of-memory error.
- *
- *********************************************************************/
-static jb_err client_accept_encoding_adder(struct client_state *csp)
-{
- if ( ((csp->action->flags & ACTION_NO_COMPRESSION) != 0)
- && (!strcmpic(csp->http->ver, "HTTP/1.1")) )
- {
- return enlist_unique(csp->headers, "Accept-Encoding: identity;q=1.0, *;q=0", 16);
- }
-
- return JB_ERR_OK;
-}
-#endif
-
-
/*********************************************************************
*
* Function : client_xtra_adder
#ifdef FEATURE_CONNECTION_KEEP_ALIVE
&& !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
#endif
- )
+ )
{
log_error(LOG_LEVEL_HEADER, "A HTTP/1.1 response "
"without Connection header implies keep-alive.");
static const char connection_close[] = "Connection: close";
if (!(csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
+ && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
&& (csp->flags & CSP_FLAG_CLIENT_CONNECTION_HEADER_SET))
{
return JB_ERR_OK;
#ifdef FEATURE_CONNECTION_KEEP_ALIVE
if ((csp->config->feature_flags & RUNTIME_FEATURE_CONNECTION_KEEP_ALIVE)
+ && !(csp->flags & CSP_FLAG_SERVER_SOCKET_TAINTED)
&& (csp->http->ssl == 0)
&& !strcmpic(csp->http->ver, "HTTP/1.1"))
{
cur_tag = *header + 11;
/* skip whitespace between "Set-Cookie:" and value */
- while (*cur_tag && ijb_isspace(*cur_tag))
+ while (*cur_tag && privoxy_isspace(*cur_tag))
{
cur_tag++;
}
next_tag++;
/* skip whitespace ";" and start of tag */
- while (*next_tag && ijb_isspace(*next_tag))
+ while (*next_tag && privoxy_isspace(*next_tag))
{
next_tag++;
}
* attempt to work around GNU libc's strptime()
* reporting negative year values when using %Y.
*/
- static const char *time_formats[] = {
+ static const char * const time_formats[] = {
/* Tue, 02-Jun-37 20:00:00 */
"%a, %d-%b-%y %H:%M:%S",
/* Tue, 02 Jun 2037 20:00:00 */
};
unsigned int i;
- /*
- * Zero out gmt to prevent time zone offsets.
- * Documented to be required for GNU libc.
- */
- memset(&gmt, 0, sizeof(gmt));
-
for (i = 0; i < SZ(time_formats); i++)
{
+ /*
+ * Zero out gmt to prevent time zone offsets.
+ * Documented to be required for GNU libc.
+ */
+ memset(&gmt, 0, sizeof(gmt));
+
if (NULL != strptime(header_time, time_formats[i], &gmt))
{
/* Sanity check for GNU libc. */
* the "Host:" header)
* 2 : http = storage for the result (host, port and hostport).
*
- * Returns : JB_ERR_MEMORY in case of memory problems,
+ * Returns : JB_ERR_MEMORY (or terminates) in case of memory problems,
* JB_ERR_PARSE if the host header couldn't be found,
* JB_ERR_OK otherwise.
*
return JB_ERR_PARSE;
}
- p = strdup(host);
- if (NULL == p)
- {
- log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
- return JB_ERR_MEMORY;
- }
+ p = strdup_or_die(host);
chomp(p);
- if (NULL == (q = strdup(p)))
- {
- freez(p);
- log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
- return JB_ERR_MEMORY;
- }
+ q = strdup_or_die(p);
freez(http->hostport);
http->hostport = p;
* Function : create_forged_referrer
*
* Description : Helper for client_referrer to forge a referer as
- * 'http://[hostname:port/' to fool stupid
+ * 'http://hostname[:port]/' to fool stupid
* checks for in-site links
*
* Parameters :
static jb_err handle_conditional_hide_referrer_parameter(char **header,
const char *host, const int parameter_conditional_block)
{
- char *referer = strdup(*header);
+ char *referer = strdup_or_die(*header);
const size_t hostlength = strlen(host);
const char *referer_url = NULL;
- if (NULL == referer)
- {
- freez(*header);
- return JB_ERR_MEMORY;
- }
-
/* referer begins with 'Referer: http[s]://' */
if ((hostlength+17) < strlen(referer))
{
}
+/*********************************************************************
+ *
+ * Function : get_expected_content_length
+ *
+ * Description : Figures out the content length from a list of headers.
+ *
+ * Parameters :
+ * 1 : headers = List of headers
+ *
+ * Returns : Number of bytes to expect
+ *
+ *********************************************************************/
+unsigned long long get_expected_content_length(struct list *headers)
+{
+ const char *content_length_header;
+ unsigned long long content_length = 0;
+
+ content_length_header = get_header_value(headers, "Content-Length:");
+ if (content_length_header != NULL)
+ {
+ if (JB_ERR_OK != get_content_length(content_length_header, &content_length))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Failed to get the Content-Length in %s", content_length_header);
+ /* XXX: The header will be removed later on */
+ return 0;
+ }
+ }
+
+ return content_length;
+}
+
+
/*
Local Variables:
tab-width: 3