X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=filters.c;h=744af6a204abdd067c6a1e0f7bedeb1dcba871fa;hb=701f0d2c06084708ab71fe06ded88d4b666dc826;hp=e268d6ddcd2659c7aa121d5575d44b80e6381bc0;hpb=c4f3088d0fc3e1d5d4cd211295e641816ed11979;p=privoxy.git diff --git a/filters.c b/filters.c deleted file mode 100644 index e268d6dd..00000000 --- a/filters.c +++ /dev/null @@ -1,1363 +0,0 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.24 2001/07/25 17:22:51 oes Exp $"; -/********************************************************************* - * - * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ - * - * Purpose : Declares functions to parse/crunch headers and pages. - * Functions declared include: - * `acl_addr', `add_stats', `block_acl', `block_imageurl', - * `block_url', `url_actions', `domaincmp', `dsplit', - * `filter_popups', `forward_url', 'redirect_url', - * `ij_untrusted_url', `intercept_url', `pcrs_filter_respose', - * `show_proxy_args', 'ijb_send_banner', and `trust_url' - * - * Copyright : Written by and Copyright (C) 2001 the SourceForge - * IJBSWA team. http://ijbswa.sourceforge.net - * - * Based on the Internet Junkbuster originally written - * by and Copyright (C) 1997 Anonymous Coders and - * Junkbusters Corporation. http://www.junkbusters.com - * - * This program is free software; you can redistribute it - * and/or modify it under the terms of the GNU General - * Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will - * be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A - * PARTICULAR PURPOSE. See the GNU General Public - * License for more details. - * - * The GNU General Public License should be included with - * this file. If not, you can view it at - * http://www.gnu.org/copyleft/gpl.html - * or write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Revisions : - * $Log: filters.c,v $ - * Revision 1.24 2001/07/25 17:22:51 oes - * Added workaround for Netscape bug that prevents display of page when loading a component fails. - * - * Revision 1.23 2001/07/23 13:40:12 oes - * Fixed bug that caused document body to be dropped when pcrs joblist was empty. - * - * Revision 1.22 2001/07/18 12:29:34 oes - * - Made gif_deanimate_response respect - * csp->action->string[ACTION_STRING_DEANIMATE] - * - Logging cosmetics - * - * Revision 1.21 2001/07/13 13:59:53 oes - * - Introduced gif_deanimate_response which shares the - * generic content modification interface of pcrs_filter_response - * and acts as a wrapper to deanimate.c:gif_deanimate() - * - Renamed re_process_buffer to pcrs_filter_response - * - pcrs_filter_response now returns NULL on failiure - * - Removed all #ifdef PCRS - * - * Revision 1.20 2001/07/01 17:01:04 oes - * Added comments and missing return statement in is_untrusted_url() - * - * Revision 1.19 2001/06/29 21:45:41 oes - * Indentation, CRLF->LF, Tab-> Space - * - * Revision 1.18 2001/06/29 13:27:38 oes - * - Cleaned up, renamed and reorderd functions - * and improved comments - * - * - block_url: - * - Ported to CGI platform. Now delivers - * http_response or NULL - * - Unified HTML and GIF generation (moved image detection - * and GIF generation here from jcc.c:chat()) - * - Fixed HTTP status to: - * - 403 (Forbidden) for the "blocked" HTML message - * - 200 (OK) for GIF answers - * - 302 (Redirect) for redirect to GIF - * - * - trust_url: - * - Ported to CGI platform. Now delivers - * http_response or NULL - * - Separated detection of untrusted URL into - * (bool)is_untrusted_url - * - Added enforcement of untrusted requests - * - * - Moved redirect_url() from cgi.c to here - * and ported it to the CGI platform - * - * - Removed logentry from cancelled commit - * - * Revision 1.17 2001/06/09 10:55:28 jongfoster - * Changing BUFSIZ ==> BUFFER_SIZE - * - * Revision 1.16 2001/06/07 23:10:26 jongfoster - * Allowing unanchored domain patterns to back off and retry - * if they partially match. Optimized right-anchored patterns. - * Moving ACL and forward files into config file. - * Replacing struct gateway with struct forward_spec - * - * Revision 1.15 2001/06/03 19:12:00 oes - * extracted-CGI relevant stuff - * - * Revision 1.14 2001/06/01 10:30:55 oes - * Added optional left-anchoring to domaincmp - * - * Revision 1.13 2001/05/31 21:21:30 jongfoster - * Permissionsfile / actions file changes: - * - Changed "permission" to "action" throughout - * - changes to file format to allow string parameters - * - Moved helper functions to actions.c - * - * Revision 1.12 2001/05/31 17:35:20 oes - * - * - Enhanced domain part globbing with infix and prefix asterisk - * matching and optional unanchored operation - * - * Revision 1.11 2001/05/29 11:53:23 oes - * "See why" link added to "blocked" page - * - * Revision 1.10 2001/05/29 09:50:24 jongfoster - * Unified blocklist/imagelist/permissionslist. - * File format is still under discussion, but the internal changes - * are (mostly) done. - * - * Also modified interceptor behaviour: - * - We now intercept all URLs beginning with one of the following - * prefixes (and *only* these prefixes): - * * http://i.j.b/ - * * http://ijbswa.sf.net/config/ - * * http://ijbswa.sourceforge.net/config/ - * - New interceptors "home page" - go to http://i.j.b/ to see it. - * - Internal changes so that intercepted and fast redirect pages - * are not replaced with an image. - * - Interceptors now have the option to send a binary page direct - * to the client. (i.e. ijb-send-banner uses this) - * - Implemented show-url-info interceptor. (Which is why I needed - * the above interceptors changes - a typical URL is - * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif". - * The previous mechanism would not have intercepted that, and - * if it had been intercepted then it then it would have replaced - * it with an image.) - * - * Revision 1.9 2001/05/27 22:17:04 oes - * - * - re_process_buffer no longer writes the modified buffer - * to the client, which was very ugly. It now returns the - * buffer, which it is then written by chat. - * - * - content_length now adjusts the Content-Length: header - * for modified documents rather than crunch()ing it. - * (Length info in csp->content_length, which is 0 for - * unmodified documents) - * - * - For this to work, sed() is called twice when filtering. - * - * Revision 1.8 2001/05/26 17:13:28 jongfoster - * Filled in a function comment. - * - * Revision 1.7 2001/05/26 15:26:15 jongfoster - * ACL feature now provides more security by immediately dropping - * connections from untrusted hosts. - * - * Revision 1.6 2001/05/26 00:28:36 jongfoster - * Automatic reloading of config file. - * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32). - * Most of the global variables have been moved to a new - * struct configuration_spec, accessed through csp->config->globalname - * Most of the globals remaining are used by the Win32 GUI. - * - * Revision 1.5 2001/05/25 22:34:30 jongfoster - * Hard tabs->Spaces - * - * Revision 1.4 2001/05/22 18:46:04 oes - * - * - Enabled filtering banners by size rather than URL - * by adding patterns that replace all standard banner - * sizes with the "Junkbuster" gif to the re_filterfile - * - * - Enabled filtering WebBugs by providing a pattern - * which kills all 1x1 images - * - * - Added support for PCRE_UNGREEDY behaviour to pcrs, - * which is selected by the (nonstandard and therefore - * capital) letter 'U' in the option string. - * It causes the quantifiers to be ungreedy by default. - * Appending a ? turns back to greedy (!). - * - * - Added a new interceptor ijb-send-banner, which - * sends back the "Junkbuster" gif. Without imagelist or - * MSIE detection support, or if tinygif = 1, or the - * URL isn't recognized as an imageurl, a lame HTML - * explanation is sent instead. - * - * - Added new feature, which permits blocking remote - * script redirects and firing back a local redirect - * to the browser. - * The feature is conditionally compiled, i.e. it - * can be disabled with --disable-fast-redirects, - * plus it must be activated by a "fast-redirects" - * line in the config file, has its own log level - * and of course wants to be displayed by show-proxy-args - * Note: Boy, all the #ifdefs in 1001 locations and - * all the fumbling with configure.in and acconfig.h - * were *way* more work than the feature itself :-( - * - * - Because a generic redirect template was needed for - * this, tinygif = 3 now uses the same. - * - * - Moved GIFs, and other static HTTP response templates - * to project.h - * - * - Some minor fixes - * - * - Removed some >400 CRs again (Jon, you really worked - * a lot! ;-) - * - * Revision 1.3 2001/05/20 16:44:47 jongfoster - * Removing last hardcoded JunkBusters.com URLs. - * - * Revision 1.2 2001/05/20 01:21:20 jongfoster - * Version 2.9.4 checkin. - * - Merged popupfile and cookiefile, and added control over PCRS - * filtering, in new "permissionsfile". - * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration - * file error you now get a message box (in the Win32 GUI) rather - * than the program exiting with no explanation. - * - Made killpopup use the PCRS MIME-type checking and HTTP-header - * skipping. - * - Removed tabs from "config" - * - Moved duplicated url parsing code in "loaders.c" to a new funcition. - * - Bumped up version number. - * - * Revision 1.1.1.1 2001/05/15 13:58:52 oes - * Initial import of version 2.9.3 source tree - * - * - *********************************************************************/ - - -#include "config.h" - -#include -#include -#include -#include -#include - -#ifndef _WIN32 -#include -#include -#else -#include -#endif - -#include "project.h" -#include "filters.h" -#include "encode.h" -#include "jcc.h" -#include "showargs.h" -#include "parsers.h" -#include "ssplit.h" -#include "gateway.h" -#include "jbsockets.h" -#include "errlog.h" -#include "jbsockets.h" -#include "miscutil.h" -#include "actions.h" -#include "cgi.h" -#include "list.h" -#include "deanimate.h" - -#ifdef _WIN32 -#include "win32.h" -#endif - -const char filters_h_rcs[] = FILTERS_H_VERSION; - -/* Fix a problem with Solaris. There should be no effect on other - * platforms. - * Solaris's isspace() is a macro which uses it's argument directly - * as an array index. Therefore we need to make sure that high-bit - * characters generate +ve values, and ideally we also want to make - * the argument match the declared parameter type of "int". - */ -#define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X)) - - -#ifdef ACL_FILES -/********************************************************************* - * - * Function : block_acl - * - * Description : Block this request? - * Decide yes or no based on ACL file. - * - * Parameters : - * 1 : dst = The proxy or gateway address this is going to. - * Or NULL to check all possible targets. - * 2 : csp = Current client state (buffers, headers, etc...) - * Also includes the client IP address. - * - * Returns : 0 = FALSE (don't block) and 1 = TRUE (do block) - * - *********************************************************************/ -int block_acl(struct access_control_addr *dst, struct client_state *csp) -{ - struct access_control_list *acl = csp->config->acl; - - /* if not using an access control list, then permit the connection */ - if (acl == NULL) - { - return(0); - } - - /* search the list */ - while (acl != NULL) - { - if ((csp->ip_addr_long & acl->src->mask) == acl->src->addr) - { - if (dst == NULL) - { - /* Just want to check if they have any access */ - if (acl->action == ACL_PERMIT) - { - return(0); - } - } - else if ( ((dst->addr & acl->dst->mask) == acl->dst->addr) - && ((dst->port == acl->dst->port) || (acl->dst->port == 0))) - { - if (acl->action == ACL_PERMIT) - { - return(0); - } - else - { - return(1); - } - } - } - acl = acl->next; - } - - return(1); - -} - - -/********************************************************************* - * - * Function : acl_addr - * - * Description : Called from `load_aclfile' to parse an ACL address. - * - * Parameters : - * 1 : aspec = String specifying ACL address. - * 2 : aca = struct access_control_addr to fill in. - * - * Returns : 0 => Ok, everything else is an error. - * - *********************************************************************/ -int acl_addr(char *aspec, struct access_control_addr *aca) -{ - int i, masklength, port; - char *p; - - masklength = 32; - port = 0; - - if ((p = strchr(aspec, '/'))) - { - *p++ = '\0'; - - if (ijb_isdigit(*p) == 0) - { - return(-1); - } - masklength = atoi(p); - } - - if ((masklength < 0) || (masklength > 32)) - { - return(-1); - } - - if ((p = strchr(aspec, ':'))) - { - *p++ = '\0'; - - if (ijb_isdigit(*p) == 0) - { - return(-1); - } - port = atoi(p); - } - - aca->port = port; - - aca->addr = ntohl(resolve_hostname_to_ip(aspec)); - - if (aca->addr == -1) - { - log_error(LOG_LEVEL_ERROR, "can't resolve address for %s", aspec); - return(-1); - } - - /* build the netmask */ - aca->mask = 0; - for (i=1; i <= masklength ; i++) - { - aca->mask |= (1 << (32 - i)); - } - - /* now mask off the host portion of the ip address - * (i.e. save on the network portion of the address). - */ - aca->addr = aca->addr & aca->mask; - - return(0); - -} -#endif /* def ACL_FILES */ - - -/********************************************************************* - * - * Function : block_url - * - * Description : Called from `chat'. Check to see if we need to block this. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : NULL => unblocked, else HTTP block response - * - *********************************************************************/ -struct http_response *block_url(struct client_state *csp) -{ - char *p; - struct http_response *rsp; - struct map *exports = NULL; - - /* - * If it's not blocked, don't block it ;-) - */ - if ((csp->action->flags & ACTION_BLOCK) == 0) - { - return(NULL); - } - - /* - * Else, prepare a response - */ - if (NULL == ( rsp = (struct http_response *)zalloc(sizeof(*rsp)))) - { - return NULL; - } - - /* - * If it's an image-url, send back an image or redirect - * as specified by the relevant +image action - */ -#ifdef IMAGE_BLOCKING - if (((csp->action->flags & ACTION_IMAGE_BLOCKER) != 0) - && is_imageurl(csp)) - { - /* determine HOW images should be blocked */ - p = csp->action->string[ACTION_STRING_IMAGE_BLOCKER]; - - /* and handle accordingly: */ - if ((p == NULL) || (0 == strcmpic(p, "logo"))) - { - rsp->body = bindup(JBGIF, sizeof(JBGIF)); - rsp->content_length = sizeof(JBGIF); - enlist_unique_header(rsp->headers, "Content-Type", "image/gif"); - } - - else if (0 == strcmpic(p, "blank")) - { - rsp->body = bindup(BLANKGIF, sizeof(BLANKGIF)); - rsp->content_length = sizeof(BLANKGIF); - enlist_unique_header(rsp->headers, "Content-Type", "image/gif"); - } - - else - { - rsp->status = strdup("302 Local Redirect from Junkbuster"); - enlist_unique_header(rsp->headers, "Location", p); - } - } - else -#endif /* def IMAGE_BLOCKING */ - - /* - * Else, generate an HTML "blocked" message: - */ - { - - exports = default_exports(csp, NULL); -#ifdef FORCE_LOAD - exports = map(exports, "force-prefix", 1, FORCE_PREFIX, 1); -#else - exports = map_block_killer(exports, "force-support"); -#endif /* ndef FORCE_LOAD */ - - exports = map(exports, "hostport", 1, csp->http->hostport, 1); - exports = map(exports, "hostport-html", 1, html_encode(csp->http->hostport), 0); - exports = map(exports, "path", 1, csp->http->path, 1); - exports = map(exports, "path-html", 1, html_encode(csp->http->path), 0); - - rsp->body = fill_template(csp, "blocked", exports); - free_map(exports); - - /* - * Workaround for stupid Netscape bug which prevents - * pages from being displayed if loading a referenced - * JavaScript or style sheet fails. So make it appear - * as if it succeeded. - */ - if (csp->http->user_agent - && !strncmpic(csp->http->user_agent, "mozilla", 7) - && !strstr(csp->http->user_agent, "compatible") - && !strstr(csp->http->user_agent, "Opera")) - { - rsp->status = strdup("200 Request for blocked URL"); - } - else - { - rsp->status = strdup("404 Request for blocked URL"); - } - - } - - return(finish_http_response(rsp)); - -} - - -#ifdef TRUST_FILES -/********************************************************************* - * - * Function : trust_url FIXME: I should be called distrust_url - * - * Description : Calls is_untrusted_url to determine if the URL is trusted - * and if not, returns a HTTP 304 response with a reject message. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : NULL => trusted, else http_response. - * - *********************************************************************/ -struct http_response *trust_url(struct client_state *csp) -{ - struct http_response *rsp; - struct map *exports = NULL; - char buf[BUFFER_SIZE], *p = NULL; - struct url_spec **tl, *t; - - /* - * Don't bother to work on trusted URLs - */ - if (!is_untrusted_url(csp)) - { - return NULL; - } - - /* - * Else, prepare a response: - */ - if (NULL == ( rsp = (struct http_response *)zalloc(sizeof(*rsp)))) - { - return NULL; - } - exports = default_exports(csp, NULL); - - /* - * Export the host, port, and referrer information - */ - exports = map(exports, "hostport", 1, csp->http->hostport, 1); - exports = map(exports, "path", 1, csp->http->path, 1); - exports = map(exports, "hostport-html", 1, html_encode(csp->http->hostport), 0); - exports = map(exports, "path-html", 1, html_encode(csp->http->path), 0); - - if (csp->referrer && strlen(csp->referrer) > 9) - { - exports = map(exports, "referrer", 1, csp->referrer + 9, 1); - exports = map(exports, "referrer-html", 1, html_encode(csp->referrer + 9), 0); - } - else - { - exports = map(exports, "referrer", 1, "unknown", 1); - exports = map(exports, "referrer-html", 1, "unknown", 1); - } - - /* - * Export the trust list - */ - for (tl = csp->config->trust_list; (t = *tl) ; tl++) - { - sprintf(buf, "
  • %s
  • \n", t->spec); - p = strsav(p, buf); - } - exports = map(exports, "trusted-referrers", 1, p, 0); - p = NULL; - - /* - * Export the trust info, if available - */ - if (csp->config->trust_info->next) - { - struct list *l; - - for (l = csp->config->trust_info->next; l ; l = l->next) - { - sprintf(buf, "
  • %s
    \n",l->str, l->str); - p = strsav(p, buf); - } - exports = map(exports, "trust-info", 1, p, 0); - } - else - { - exports = map_block_killer(exports, "have-trust-info"); - } - - /* - * Export the force prefix or the force conditional block killer - */ -#ifdef FORCE_LOAD - exports = map(exports, "force-prefix", 1, FORCE_PREFIX, 1); -#else - exports = map_block_killer(exports, "force-support"); -#endif /* ndef FORCE_LOAD */ - - /* - * Build the response - */ - rsp->body = fill_template(csp, "untrusted", exports); - free_map(exports); - - return(finish_http_response(rsp)); - -} -#endif /* def TRUST_FILES */ - - -#ifdef FAST_REDIRECTS -/********************************************************************* - * - * Function : redirect_url - * - * Description : Checks for redirection URLs and returns a HTTP redirect - * to the destination URL, if necessary - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : NULL if URL was clean, HTTP redirect otherwise. - * - *********************************************************************/ -struct http_response *redirect_url(struct client_state *csp) -{ - char *p, *q; - struct http_response *rsp; - - p = q = csp->http->path; - log_error(LOG_LEVEL_REDIRECTS, "checking path for redirects: %s", p); - - /* - * find the last URL encoded in the request - */ - while (p = strstr(p, "http://")) - { - q = p++; - } - - /* - * if there was any, generate and return a HTTP redirect - */ - if (q != csp->http->path) - { - log_error(LOG_LEVEL_REDIRECTS, "redirecting to: %s", q); - - if (NULL == ( rsp = zalloc(sizeof(*rsp)))) - { - return NULL; - } - - rsp->status = strdup("302 Local Redirect from Junkbuster"); - enlist_unique_header(rsp->headers, "Location", q); - - return(finish_http_response(rsp)); - } - else - { - return(NULL); - } - -} -#endif /* def FAST_REDIRECTS */ - - -#ifdef IMAGE_BLOCKING -/********************************************************************* - * - * Function : is_imageurl - * - * Description : Given a URL, decide whether it is an image or not, - * using either the info from a previous +image action - * or, #ifdef DETECT_MSIE_IMAGES, the info from the - * browser's accept header. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : True (nonzero) if URL is an image, false (0) - * otherwise - * - *********************************************************************/ -int is_imageurl(struct client_state *csp) -{ -#ifdef DETECT_MSIE_IMAGES - if ((csp->accept_types - & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML)) - == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE)) - { - return 1; - } - else if ((csp->accept_types - & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML)) - == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_HTML)) - { - return 0; - } -#endif - - return ((csp->action->flags & ACTION_IMAGE) != 0); - -} -#endif /* def IMAGE_BLOCKING */ - - -#ifdef TRUST_FILES -/********************************************************************* - * - * Function : is_untrusted_url - * - * Description : Should we "distrust" this URL (and block it)? - * - * Yes if it matches a line in the trustfile, or if the - * referrer matches a line starting with "+" in the - * trustfile. - * No otherwise. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : 0 => trusted, 1 => untrusted - * - *********************************************************************/ -int is_untrusted_url(struct client_state *csp) -{ - struct file_list *fl; - struct block_spec *b; - struct url_spec url[1], **tl, *t; - struct http_request rhttp[1]; - char *p, *h; - - /* - * If we don't have a trustlist, we trust everybody - */ - if (((fl = csp->tlist) == NULL) || ((b = fl->f) == NULL)) - { - return(0); - } - - - /* - * Do we trust the request URL itself? - */ - *url = dsplit(csp->http->host); - - /* if splitting the domain fails, punt */ - if (url->dbuf == NULL) return(0); - - memset(rhttp, '\0', sizeof(*rhttp)); - - for (b = b->next; b ; b = b->next) - { - if ((b->url->port == 0) || (b->url->port == csp->http->port)) - { - if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0)) - { - if ((b->url->path == NULL) || -#ifdef REGEX - (regexec(b->url->preg, csp->http->path, 0, NULL, 0) == 0) -#else - (strncmp(b->url->path, csp->http->path, b->url->pathlen) == 0) -#endif - ) - { - freez(url->dbuf); - freez(url->dvec); - - if (b->reject == 0) return(0); - - return(1); - } - } - } - } - - freez(url->dbuf); - freez(url->dvec); - - if ((csp->referrer == NULL)|| (strlen(csp->referrer) <= 9)) - { - /* no referrer was supplied */ - return(1); - } - - /* forge a URL from the referrer so we can use - * convert_url() to parse it into its components. - */ - - p = NULL; - p = strsav(p, "GET "); - p = strsav(p, csp->referrer + 9); /* skip over "Referer: " */ - p = strsav(p, " HTTP/1.0"); - - parse_http_request(p, rhttp, csp); - freez(p); - - if (rhttp->cmd == NULL) - { - return(1); - } - - - /* - * If not, do we maybe trust its referrer? - */ - *url = dsplit(rhttp->host); - - /* if splitting the domain fails, punt */ - if (url->dbuf == NULL) return(1); - - for (tl = csp->config->trust_list; (t = *tl) ; tl++) - { - if ((t->port == 0) || (t->port == rhttp->port)) - { - if ((t->domain[0] == '\0') || domaincmp(t, url) == 0) - { - if ((t->path == NULL) || -#ifdef REGEX - (regexec(t->preg, rhttp->path, 0, NULL, 0) == 0) -#else - (strncmp(t->path, rhttp->path, t->pathlen) == 0) -#endif - ) - { - /* if the URL's referrer is from a trusted referrer, then - * add the target spec to the trustfile as an unblocked - * domain and return NULL (which means it's OK). - */ - - FILE *fp; - - freez(url->dbuf); - freez(url->dvec); - - if ((fp = fopen(csp->config->trustfile, "a"))) - { - h = NULL; - - h = strsav(h, "~"); - h = strsav(h, csp->http->hostport); - - p = csp->http->path; - if ((*p++ == '/') - && (*p++ == '~')) - { - /* since this path points into a user's home space - * be sure to include this spec in the trustfile. - */ - if ((p = strchr(p, '/'))) - { - *p = '\0'; - h = strsav(h, csp->http->path); /* FIXME: p?! */ - h = strsav(h, "/"); - } - } - - fprintf(fp, "%s\n", h); - freez(h); - fclose(fp); - } - return(0); - } - } - } - } - return(1); -} -#endif /* def TRUST_FILES */ - - -/********************************************************************* - * - * Function : pcrs_filter_response - * - * Description : Apply all the pcrs jobs from the joblist (re_filterfile) - * to the text buffer that's been accumulated in - * csp->iob->buf and set csp->content_length to the modified - * size. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : a pointer to the (newly allocated) modified buffer. - * or NULL in case something went wrong - * - *********************************************************************/ -char *pcrs_filter_response(struct client_state *csp) -{ - int hits=0; - int size = csp->iob->eod - csp->iob->cur; - - char *old = csp->iob->cur, *new = NULL; - pcrs_job *job; - - struct file_list *fl; - struct re_filterfile_spec *b; - - /* Sanity first ;-) */ - if (size <= 0) - { - return(NULL); - } - - if ( ( NULL == (fl = csp->rlist) ) || ( NULL == (b = fl->f) ) ) - { - log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering."); - return(NULL); - } - - if ( NULL == b->joblist ) - { - log_error(LOG_LEVEL_RE_FILTER, "Empty joblist. Nothing to do."); - return(NULL); - } - - log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) ...", - csp->http->hostport, csp->http->path, size); - - /* Apply all jobs from the joblist */ - for (job = b->joblist; NULL != job; job = job->next) - { - hits += pcrs_execute(job, old, size, &new, &size); - if (old != csp->iob->cur) free(old); - old=new; - } - - log_error(LOG_LEVEL_RE_FILTER, " produced %d hits (new size %d).", hits, size); - - csp->content_length = size; - - /* fwiw, reset the iob */ - IOB_RESET(csp); - return(new); - -} - - -/********************************************************************* - * - * Function : gif_deanimate_response - * - * Description : Deanimate the GIF image that has been accumulated in - * csp->iob->buf and set csp->content_length to the modified - * size. - * - * Parameters : - * 1 : csp = Current client state (buffers, headers, etc...) - * - * Returns : a pointer to the (newly allocated) modified buffer. - * or NULL in case something went wrong. - * - *********************************************************************/ -char *gif_deanimate_response(struct client_state *csp) -{ - struct binbuffer *in, *out; - char *p; - int size = csp->iob->eod - csp->iob->cur; - - if ( (NULL == (in = (struct binbuffer *)zalloc(sizeof *in ))) - || (NULL == (out = (struct binbuffer *)zalloc(sizeof *out))) ) - { - log_error(LOG_LEVEL_DEANIMATE, "failed! (no mem)"); - return NULL; - } - - in->buffer = csp->iob->cur; - in->size = size; - - if (gif_deanimate(in, out, strncmp("last", csp->action->string[ACTION_STRING_DEANIMATE], 4))) - { - log_error(LOG_LEVEL_DEANIMATE, "failed! (gif parsing)"); - free(in); - buf_free(out); - return(NULL); - } - else - { - log_error(LOG_LEVEL_DEANIMATE, "Success! GIF shrunk from %d bytes to %d.", size, out->offset); - csp->content_length = out->offset; - p = out->buffer; - free(in); - free(out); - return(p); - } - -} - - -/********************************************************************* - * - * Function : url_actions - * - * Description : Gets the actions for this URL. - * - * Parameters : - * 1 : http = http_request request for blocked URLs - * 2 : csp = Current client state (buffers, headers, etc...) - * - * Returns : N/A - * - *********************************************************************/ -void url_actions(struct http_request *http, - struct client_state *csp) -{ - struct file_list *fl; - struct url_actions *b; - - init_current_action(csp->action); - - if (((fl = csp->actions_list) == NULL) || ((b = fl->f) == NULL)) - { - return; - } - - apply_url_actions(csp->action, http, b); - -} - - -/********************************************************************* - * - * Function : apply_url_actions - * - * Description : Applies a list of URL actions. - * - * Parameters : - * 1 : action = Destination. - * 2 : http = Current URL - * 3 : b = list of URL actions to apply - * - * Returns : N/A - * - *********************************************************************/ -void apply_url_actions(struct current_action_spec *action, - struct http_request *http, - struct url_actions *b) -{ - struct url_spec url[1]; - - if (b == NULL) - { - /* Should never happen */ - return; - } - - *url = dsplit(http->host); - - /* if splitting the domain fails, punt */ - if (url->dbuf == NULL) - { - return; - } - - for (b = b->next; NULL != b; b = b->next) - { - if ((b->url->port == 0) || (b->url->port == http->port)) - { - if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0)) - { - if ((b->url->path == NULL) || -#ifdef REGEX - (regexec(b->url->preg, http->path, 0, NULL, 0) == 0) -#else - (strncmp(b->url->path, http->path, b->url->pathlen) == 0) -#endif - ) - { - merge_current_action(action, b->action); - } - } - } - } - - freez(url->dbuf); - freez(url->dvec); -} - - -/********************************************************************* - * - * Function : forward_url - * - * Description : Should we forward this to another proxy? - * - * Parameters : - * 1 : http = http_request request for current URL - * 2 : csp = Current client state (buffers, headers, etc...) - * - * Returns : Pointer to forwarding information. - * - *********************************************************************/ -const struct forward_spec * forward_url(struct http_request *http, - struct client_state *csp) -{ - static const struct forward_spec fwd_default[1] = { 0 }; /* All zeroes */ - struct forward_spec *fwd = csp->config->forward; - struct url_spec url[1]; - - if (fwd == NULL) - { - return(fwd_default); - } - - *url = dsplit(http->host); - - /* if splitting the domain fails, punt */ - if (url->dbuf == NULL) - { - return(fwd_default); - } - - while (fwd != NULL) - { - if ((fwd->url->port == 0) || (fwd->url->port == http->port)) - { - if ((fwd->url->domain[0] == '\0') || (domaincmp(fwd->url, url) == 0)) - { - if ((fwd->url->path == NULL) || -#ifdef REGEX - (regexec(fwd->url->preg, http->path, 0, NULL, 0) == 0) -#else - (strncmp(fwd->url->path, http->path, fwd->url->pathlen) == 0) -#endif - ) - { - freez(url->dbuf); - freez(url->dvec); - return(fwd); - } - } - } - fwd = fwd->next; - } - - freez(url->dbuf); - freez(url->dvec); - return(fwd_default); - -} - - -/********************************************************************* - * - * Function : dsplit - * - * Description : Takes a domain and returns a pointer to a url_spec - * structure populated with dbuf, dcnt and dvec. The - * other fields in the structure that is returned are zero. - * - * Parameters : - * 1 : domain = a URL address - * - * Returns : url_spec structure populated with dbuf, dcnt and dvec. - * - *********************************************************************/ -struct url_spec dsplit(char *domain) -{ - struct url_spec ret[1]; - char *v[BUFFER_SIZE]; - int size; - char *p; - - memset(ret, '\0', sizeof(*ret)); - - if (domain[strlen(domain) - 1] == '.') - { - ret->unanchored |= ANCHOR_RIGHT; - } - - if (domain[0] == '.') - { - ret->unanchored |= ANCHOR_LEFT; - } - - ret->dbuf = strdup(domain); - - /* map to lower case */ - for (p = ret->dbuf; *p ; p++) *p = tolower(*p); - - /* split the domain name into components */ - ret->dcnt = ssplit(ret->dbuf, ".", v, SZ(v), 1, 1); - - if (ret->dcnt <= 0) - { - memset(ret, '\0', sizeof(ret)); - return(*ret); - } - - /* save a copy of the pointers in dvec */ - size = ret->dcnt * sizeof(*ret->dvec); - - if ((ret->dvec = (char **)malloc(size))) - { - memcpy(ret->dvec, v, size); - } - - return(*ret); - -} - - -/********************************************************************* - * - * Function : simple_domaincmp - * - * Description : Domain-wise Compare fqdn's. The comparison is - * both left- and right-anchored. The individual - * domain names are compared with simplematch(). - * This is only used by domaincmp. - * - * Parameters : - * 1 : pv = array of patterns to compare - * 2 : fv = array of domain components to compare - * 3 : len = length of the arrays (both arrays are the - * same length - if they weren't, it couldn't - * possibly be a match). - * - * Returns : 0 => domains are equivalent, else no match. - * - *********************************************************************/ -static int simple_domaincmp(char **pv, char **fv, int len) -{ - int n; - - for (n = 0; n < len; n++) - { - if (simplematch(pv[n], fv[n])) - { - return 1; - } - } - - return 0; - -} - - -/********************************************************************* - * - * Function : domaincmp - * - * Description : Domain-wise Compare fqdn's. Governed by the bimap in - * pattern->unachored, the comparison is un-, left-, - * right-anchored, or both. - * The individual domain names are compared with - * simplematch(). - * - * Parameters : - * 1 : pattern = a domain that may contain a '*' as a wildcard. - * 2 : fqdn = domain name against which the patterns are compared. - * - * Returns : 0 => domains are equivalent, else no match. - * - *********************************************************************/ -int domaincmp(struct url_spec *pattern, struct url_spec *fqdn) -{ - char **pv, **fv; /* vectors */ - int plen, flen; - int unanchored = pattern->unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT); - - plen = pattern->dcnt; - flen = fqdn->dcnt; - - if (flen < plen) - { - /* fqdn is too short to match this pattern */ - return 1; - } - - pv = pattern->dvec; - fv = fqdn->dvec; - - if (unanchored == ANCHOR_LEFT) - { - /* - * Right anchored. - * - * Convert this into a fully anchored pattern with - * the fqdn and pattern the same length - */ - fv += (flen - plen); /* flen - plen >= 0 due to check above */ - return simple_domaincmp(pv, fv, plen); - } - else if (unanchored == 0) - { - /* Fully anchored, check length */ - if (flen != plen) - { - return 1; - } - return simple_domaincmp(pv, fv, plen); - } - else if (unanchored == ANCHOR_RIGHT) - { - /* Left anchored, ignore all extra in fqdn */ - return simple_domaincmp(pv, fv, plen); - } - else - { - /* Unanchored */ - int n; - int maxn = flen - plen; - for (n = 0; n <= maxn; n++) - { - if (!simple_domaincmp(pv, fv, plen)) - { - return 0; - } - /* - * Doesn't match from start of fqdn - * Try skipping first part of fqdn - */ - fv++; - } - return 1; - } - -} - - -/* - Local Variables: - tab-width: 3 - end: -*/