#! /bin/sh /usr/share/dpatch/dpatch-run ## 16_gzip.dpatch by Wil Mahan ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: Add deflate-filter support to privoxy, see sf bug id 895531 ## DP: Adapted from privoxy 3.1 to 3.0.x by Roland Rosenfeld. ## DP: This requires --enable-zlib as configure option in debian/rules ## DP: and a build dependeny on zlib1g-dev. @DPATCH@ diff -urNad privoxy~/actionlist.h privoxy/actionlist.h --- privoxy~/actionlist.h +++ privoxy/actionlist.h @@ -116,6 +116,7 @@ DEFINE_ACTION_STRING ("deanimate-gifs", ACTION_DEANIMATE, ACTION_STRING_DEANIMATE) DEFINE_CGI_PARAM_RADIO ("deanimate-gifs", ACTION_DEANIMATE, ACTION_STRING_DEANIMATE, "first", 0) DEFINE_CGI_PARAM_RADIO ("deanimate-gifs", ACTION_DEANIMATE, ACTION_STRING_DEANIMATE, "last", 1) +DEFINE_ACTION_BOOL ("decompress-from-server", ACTION_DECOMPRESS_IN) DEFINE_ACTION_BOOL ("downgrade-http-version", ACTION_DOWNGRADE) DEFINE_ACTION_BOOL ("fast-redirects", ACTION_FAST_REDIRECTS) DEFINE_ACTION_MULTI ("filter", ACTION_MULTI_FILTER) diff -urNad privoxy~/configure.in privoxy/configure.in --- privoxy~/configure.in +++ privoxy/configure.in @@ -1234,6 +1234,20 @@ libpcrs is available], [ if test $enableval = "no"; then have_pcrs=no; fi ]) +AC_ARG_ENABLE(zlib, +[ --enable-zlib Use the zlib library to allow compressing or + decompressing data on the fly.], +[enableval2=$enableval], +[enableval2=no]) +if test $enableval2 = yes; then + AC_CHECK_LIB(z, zlibVersion, , [ + AC_MSG_ERROR([Unable to find a copy of zlib. The zlib library +is necessary to enable compresion support. ]) + ]) + AC_DEFINE(FEATURE_ZLIB,1, + [ Define to 1 to use compression through the zlib library. ]) +fi + # If we have libpcre and either we also have pcreposix or # we don't need pcreposix, then link pcre dynamically; else diff -urNad privoxy~/default.action.master privoxy/default.action.master --- privoxy~/default.action.master +++ privoxy/default.action.master @@ -452,7 +452,7 @@ -hide-user-agent \ -kill-popups \ -limit-connect \ -+prevent-compression \ +-prevent-compression \ -send-vanilla-wafer \ -send-wafer \ +session-cookies-only \ diff -urNad privoxy~/filters.c privoxy/filters.c --- privoxy~/filters.c +++ privoxy/filters.c @@ -1325,6 +1325,38 @@ return(NULL); } +#ifdef FEATURE_ZLIB + /* If the body has a compressed transfer-encoding, uncompress + * it first, adjusting size and iob->eod. Note that + * decompression occurs after de-chunking. + */ + if (csp->content_type & CT_GZIP || csp->content_type & CT_DEFLATE) + { + /* Notice that we at least tried to decompress. */ + if (JB_ERR_OK != decompress_iob(csp)) + { + /* We failed to decompress the data; there's no point + * in continuing since we can't filter. This is + * slightly tricky because we need to remember not to + * modify the Content-Encoding header later; using + * CT_TABOO flag is a kludge for this purpose. + */ + csp->content_type |= CT_TABOO; + return(NULL); + } + log_error(LOG_LEVEL_RE_FILTER, "Decompressing successful"); + + /* Decompression gives us a completely new iob, so we + * need to update. + */ + size = csp->iob->eod - csp->iob->cur; + old = csp->iob->cur; + + csp->flags |= CSP_FLAG_MODIFIED; + } +#endif + + /* * If the body has a "chunked" transfer-encoding, * get rid of it first, adjusting size and iob->eod diff -urNad privoxy~/jcc.c privoxy/jcc.c --- privoxy~/jcc.c +++ privoxy/jcc.c @@ -659,6 +659,10 @@ # include # endif +#ifdef FEATURE_ZLIB +#include +#endif + #endif #include "project.h" @@ -1609,6 +1613,8 @@ if ((csp->content_type & CT_TEXT) && /* It's a text / * MIME-Type */ !http->ssl && /* We talk plaintext */ + !(csp->content_type & CT_GZIP) && + !(csp->content_type & CT_DEFLATE) && block_popups) /* Policy allows */ { block_popups_now = 1; diff -urNad privoxy~/parsers.c privoxy/parsers.c --- privoxy~/parsers.c +++ privoxy/parsers.c @@ -433,6 +433,10 @@ #include #include +#ifdef FEATURE_ZLIB +#include +#endif + #if !defined(_WIN32) && !defined(__OS2__) #include #endif @@ -632,6 +636,281 @@ } +#ifdef FEATURE_ZLIB +/********************************************************************* + * + * Function : decompress_iob + * + * Description : Decompress buffered page, expanding the + * buffer as necessary. csp->iob->cur + * should point to the the beginning of the + * compressed data block. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK on success, JB_ERR_MEMORY if out-of-memory + * limit reached, JB_ERR_COMPRESS if error decompressing + * buffer. + * + *********************************************************************/ +jb_err decompress_iob(struct client_state *csp) +{ + char *buf; /* new, uncompressed buffer */ + int bufsize = csp->iob->size; /* allocated size of the new buffer */ + /* Number of bytes at the beginning + * of the iob that we should NOT + * decompress. + */ + int skip_size = csp->iob->cur - csp->iob->buf; + int status; /* return status of the inflate() call */ + z_stream zstr; /* used by calls to zlib */ + + /* This is to protect the parsing of gzipped data, but it should(?) + * be valid for deflated data also. + */ + if (bufsize < 10) + { + log_error (LOG_LEVEL_ERROR, "Buffer too small decompressing iob"); + return JB_ERR_COMPRESS; + } + + if (csp->content_type & CT_GZIP) + { + /* Our task is slightly complicated by the facts that data + * compressed by gzip does not include a zlib header, and + * that there is no easily accessible interface in zlib to + * handle a gzip header. We strip off the gzip header by + * hand, and later inform zlib not to expect a header. + */ + + /* Strip off the gzip header. Please see RFC 1952 for more + * explanation of the appropriate fields. + */ + if ((*csp->iob->cur++ != (char)0x1f) + || (*csp->iob->cur++ != (char)0x8b) + || (*csp->iob->cur++ != Z_DEFLATED)) + { + log_error (LOG_LEVEL_ERROR, + "Invalid gzip header when decompressing"); + return JB_ERR_COMPRESS; + } + else { + int flags = *csp->iob->cur++; + if (flags & 0xe0) + { + /* The gzip header has reserved bits set; bail out. */ + log_error (LOG_LEVEL_ERROR, + "Invalid gzip header when decompressing"); + return JB_ERR_COMPRESS; + } + csp->iob->cur += 6; + + /* Skip extra fields if necessary. */ + if (flags & 0x04) + { + /* Skip a given number of bytes, specified as a 16-bit + * little-endian value. + */ + csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8); + } + + /* Skip the filename if necessary. */ + if (flags & 0x08) + { + /* A null-terminated string follows. */ + while (*csp->iob->cur++); + } + + /* Skip the comment if necessary. */ + if (flags & 0x10) + { + while (*csp->iob->cur++); + } + + /* Skip the CRC if necessary. */ + if (flags & 0x02) + { + csp->iob->cur += 2; + } + } + } + else if (csp->content_type & CT_DEFLATE) + { + log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *csp->iob->cur); + /* In theory (that is, according to RFC 1950), deflate-compressed + * data should begin with a two-byte zlib header and have an + * adler32 checksum at the end. It seems that in practice the + * only the raw compressed data is sent. Note that this means that + * we are not RFC 1950-compliant here, but the advantage is that + * this actually works. :) + * + * We add a dummy null byte to tell zlib where the data ends, + * and later inform it not to expect a header. + * + * Fortunately, add_to_iob() has thoughtfully null-terminated + * the buffer; we can just increment the end pointer to include + * the dummy byte. + */ + csp->iob->eod++; + } + else + { + log_error (LOG_LEVEL_ERROR, + "Unable to determine compression format for decompression"); + return JB_ERR_COMPRESS; + } + + /* Set up the fields required by zlib. */ + zstr.next_in = csp->iob->cur; + zstr.avail_in = csp->iob->eod - csp->iob->cur; + zstr.zalloc = Z_NULL; + zstr.zfree = Z_NULL; + zstr.opaque = Z_NULL; + + /* Passing -MAX_WBITS to inflateInit2 tells the library + * that there is no zlib header. + */ + if (inflateInit2 (&zstr, -MAX_WBITS) != Z_OK) + { + log_error (LOG_LEVEL_ERROR, + "Error initializing decompression"); + return JB_ERR_COMPRESS; + } + + /* Next, we allocate new storage for the inflated data. + * We don't modify the existing iob yet, so in case there + * is error in decompression we can recover gracefully. + */ + buf = zalloc (bufsize); + if (NULL == buf) + { + log_error (LOG_LEVEL_ERROR, + "Out of memory decompressing iob"); + return JB_ERR_MEMORY; + } + + assert(bufsize >= skip_size); + memcpy(buf, csp->iob->buf, skip_size); + zstr.avail_out = bufsize - skip_size; + zstr.next_out = buf + skip_size; + + /* Try to decompress the whole stream in one shot. */ + while (Z_BUF_ERROR == (status = inflate(&zstr, Z_FINISH))) + { + /* We need to allocate more memory for the output buffer. */ + + char *tmpbuf; /* used for realloc'ing the buffer */ + int oldbufsize = bufsize; /* keep track of the old bufsize */ + + /* If zlib wants more data then there's a problem, because + * the complete compressed file should have been buffered. + */ + if (0 == zstr.avail_in) + { + log_error(LOG_LEVEL_ERROR, + "Unexpected end of compressed iob"); + return JB_ERR_COMPRESS; + } + + /* If we tried the limit and still didn't have enough + * memory, just give up. + */ + if (bufsize == csp->config->buffer_limit) + { + log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob"); + return JB_ERR_MEMORY; + } + + /* Try doubling the buffer size each time. */ + bufsize *= 2; + + /* Don't exceed the buffer limit. */ + if (bufsize > csp->config->buffer_limit) + { + bufsize = csp->config->buffer_limit; + } + + /* Try to allocate the new buffer. */ + tmpbuf = realloc(buf, bufsize); + if (NULL == tmpbuf) + { + log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob"); + freez(buf); + return JB_ERR_MEMORY; + } + else + { + char *oldnext_out = zstr.next_out; + + /* Update the fields for inflate() to use the new + * buffer, which may be in a different location from + * the old one. + */ + zstr.avail_out += bufsize - oldbufsize; + zstr.next_out = tmpbuf + bufsize - zstr.avail_out; + + /* Compare with an uglier method of calculating these values + * that doesn't require the extra oldbufsize variable. + */ + assert(zstr.avail_out == + tmpbuf + bufsize - (char *)zstr.next_out); + assert((char *)zstr.next_out == + tmpbuf + ((char *)oldnext_out - buf)); + assert(zstr.avail_out > 0); + + buf = tmpbuf; + } + } + + inflateEnd(&zstr); + if (status != Z_STREAM_END) + { + /* We failed to decompress the stream. */ + log_error(LOG_LEVEL_ERROR, + "Error in decompressing to the buffer (iob): %s", + zstr.msg); + return JB_ERR_COMPRESS; + } + + /* Finally, we can actually update the iob, since the + * decompression was successful. First, free the old + * buffer. + */ + freez(csp->iob->buf); + + /* Now, update the iob to use the new buffer. */ + csp->iob->buf = buf; + csp->iob->cur = csp->iob->buf + skip_size; + csp->iob->eod = zstr.next_out; + csp->iob->size = bufsize; + + /* Make sure the new uncompressed iob obeys some minimal + * consistency conditions. + */ + if ((csp->iob->buf < csp->iob->cur) + && (csp->iob->cur <= csp->iob->eod) + && (csp->iob->eod <= csp->iob->buf + csp->iob->size)) + { + char t = csp->iob->cur[100]; + csp->iob->cur[100] = 0; + log_error(LOG_LEVEL_INFO, + "Sucessfully decompressed: %s", csp->iob->cur); + csp->iob->cur[100] = t; + return JB_ERR_OK; + } + else + { + /* It seems that zlib did something weird. */ + log_error(LOG_LEVEL_ERROR, + "Unexpected error decompressing the buffer (iob): %d==%d, %d>%d, %d<%d", csp->iob->cur, csp->iob->buf + skip_size, csp->iob->eod, csp->iob->buf, csp->iob->eod, csp->iob->buf + csp->iob->size); + return JB_ERR_COMPRESS; + } + +} +#endif /* defined(FEATURE_ZLIB) */ + + /********************************************************************* * * Function : get_header @@ -936,13 +1215,59 @@ *********************************************************************/ jb_err server_content_encoding(struct client_state *csp, char **header) { +#ifdef FEATURE_ZLIB + if (strstr(*header, "gzip")) + { + /* + * If the body was modified, we have tried to + * decompress it, so adjust the header if necessary. + */ + if ((csp->flags & CSP_FLAG_MODIFIED) /* we attempted to decompress */ + && !(csp->content_type & CT_TABOO)) /* decompression was successful */ + { + freez(*header); + *header = strdup("Content-Encoding: identity"); + return (header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK; + } + else + { + csp->content_type |= CT_GZIP; + } + } + else if (strstr(*header, "deflate")) + { + /* + * If the body was modified, we have tried to + * decompress it, so adjust the header if necessary. + */ + if ((csp->flags & CSP_FLAG_MODIFIED) /* we attempted to decompress */ + && !(csp->content_type & CT_TABOO)) /* decompression was successful */ + { + freez(*header); + *header = strdup("Content-Encoding: identity"); + return (header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK; + } + else + { + csp->content_type |= CT_DEFLATE; + } + } + else if (strstr(*header, "compress")) + { + /* We can't decompress this; therefore we can't filter + * it either. + */ + csp->content_type |= CT_TABOO; + } +#else /* !defined(FEATURE_GZIP) */ /* * Turn off pcrs and gif filtering if body compressed */ if (strstr(*header, "gzip") || strstr(*header, "compress") || strstr(*header, "deflate")) { - csp->content_type = CT_TABOO; + csp->content_type |= CT_TABOO; } +#endif /* !defined(FEATURE_GZIP) */ return JB_ERR_OK; diff -urNad privoxy~/parsers.h privoxy/parsers.h --- privoxy~/parsers.h +++ privoxy/parsers.h @@ -194,6 +194,7 @@ extern int flush_socket(jb_socket fd, struct client_state *csp); extern jb_err add_to_iob(struct client_state *csp, char *buf, int n); +extern jb_err decompress_iob(struct client_state *csp); extern char *get_header(struct client_state *csp); extern char *get_header_value(const struct list *header_list, const char *header_name); extern char *sed(const struct parsers pats[], const add_header_func_ptr more_headers[], struct client_state *csp); diff -urNad privoxy~/project.h privoxy/project.h --- privoxy~/project.h +++ privoxy/project.h @@ -563,7 +563,7 @@ #define JB_ERR_PARSE 4 /**< Error parsing file */ #define JB_ERR_MODIFIED 5 /**< File has been modified outside of the CGI actions editor. */ - +#define JB_ERR_COMPRESS 6 /**< Error on decompression */ /** * This macro is used to free a pointer that may be NULL. @@ -818,6 +818,15 @@ #define CT_TABOO 4 /**< csp->content_type bitmask: DO NOT filter, irrespective of other flags. */ +/* Although these are not, strictly speaking, content types + * (they are content encodings), it is simple to handle + * them as such. + */ +#define CT_GZIP 8 /**< csp->content_type bitmask: + gzip-compressed data. */ +#define CT_DEFLATE 16 /**< csp->content_type bitmask: + zlib-compressed data. */ + /** * The mask which includes all actions. */ @@ -862,6 +871,8 @@ #define ACTION_VANILLA_WAFER 0x00008000UL /** Action bitmap: Limit CONNECT requests to safe ports. */ #define ACTION_LIMIT_CONNECT 0x00010000UL +/** Action bitmap: Uncompress incoming text for filtering. */ +#define ACTION_DECOMPRESS_IN 0x00020000UL /** Action string index: How to deanimate GIFs */ #define ACTION_STRING_DEANIMATE 0