X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=filters.c;h=e5497c2e353ffc02e21ce60b23138a3018d14581;hp=44a884f538dca8af752482aa04dd9b2920d974c6;hb=4793e1be2b24df19d6aa42a84b95653d040054f9;hpb=ffc1ab733579543abf77003e7d4b1a373d81c7a3 diff --git a/filters.c b/filters.c index 44a884f5..e5497c2e 100644 --- a/filters.c +++ b/filters.c @@ -1,4 +1,4 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.8 2001/05/26 17:13:28 jongfoster Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.10 2001/05/29 09:50:24 jongfoster Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ @@ -38,6 +38,42 @@ const char filters_rcs[] = "$Id: filters.c,v 1.8 2001/05/26 17:13:28 jongfoster * * Revisions : * $Log: filters.c,v $ + * Revision 1.10 2001/05/29 09:50:24 jongfoster + * Unified blocklist/imagelist/permissionslist. + * File format is still under discussion, but the internal changes + * are (mostly) done. + * + * Also modified interceptor behaviour: + * - We now intercept all URLs beginning with one of the following + * prefixes (and *only* these prefixes): + * * http://i.j.b/ + * * http://ijbswa.sf.net/config/ + * * http://ijbswa.sourceforge.net/config/ + * - New interceptors "home page" - go to http://i.j.b/ to see it. + * - Internal changes so that intercepted and fast redirect pages + * are not replaced with an image. + * - Interceptors now have the option to send a binary page direct + * to the client. (i.e. ijb-send-banner uses this) + * - Implemented show-url-info interceptor. (Which is why I needed + * the above interceptors changes - a typical URL is + * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif". + * The previous mechanism would not have intercepted that, and + * if it had been intercepted then it then it would have replaced + * it with an image.) + * + * Revision 1.9 2001/05/27 22:17:04 oes + * + * - re_process_buffer no longer writes the modified buffer + * to the client, which was very ugly. It now returns the + * buffer, which it is then written by chat. + * + * - content_length now adjusts the Content-Length: header + * for modified documents rather than crunch()ing it. + * (Length info in csp->content_length, which is 0 for + * unmodified documents) + * + * - For this to work, sed() is called twice when filtering. + * * Revision 1.8 2001/05/26 17:13:28 jongfoster * Filled in a function comment. * @@ -148,6 +184,7 @@ const char filters_rcs[] = "$Id: filters.c,v 1.8 2001/05/26 17:13:28 jongfoster #include "jbsockets.h" #include "errlog.h" #include "jbsockets.h" +#include "miscutil.h" #ifdef _WIN32 #include "win32.h" @@ -183,13 +220,13 @@ static const char CBLOCK[] = "

" BANNER "

\n" - "

Your request for %s%s
\n" - "was blocked because it matches the following pattern " - "in the blockfile: %s\n

" + "

Your request for %s%s\n" + "was blocked.
See why" #ifdef FORCE_LOAD - "

Go there anyway.

" + " or " + "go there anyway." #endif /* def FORCE_LOAD */ + "

\n" "\n" "\n"; @@ -210,7 +247,7 @@ static const char CTRUST[] = "\n" WHITEBG "
" - "" + "" BANNER "" "
" @@ -373,71 +410,39 @@ int acl_addr(char *aspec, struct access_control_addr *aca) *********************************************************************/ char *block_url(struct http_request *http, struct client_state *csp) { - struct file_list *fl; - struct block_spec *b; - struct url_spec url[1]; char *p; int n; + int factor = 2; - if (((fl = csp->blist) == NULL) || ((b = fl->f) == NULL)) + if ((csp->permissions & PERMIT_BLOCK) == 0) { return(NULL); } - - *url = dsplit(http->host); - - /* if splitting the domain fails, punt */ - if (url->dbuf == NULL) return(NULL); - - for (b = b->next; b ; b = b->next) + else { - if ((b->url->port == 0) || (b->url->port == http->port)) - { - if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0)) - { - if ((b->url->path == NULL) || -#ifdef REGEX - (regexec(b->url->preg, http->path, 0, NULL, 0) == 0) -#else - (strncmp(b->url->path, http->path, b->url->pathlen) == 0) -#endif - ) - { - freez(url->dbuf); - freez(url->dvec); - - if (b->reject == 0) return(NULL); - - n = strlen(CBLOCK); - n += strlen(http->hostport); - n += strlen(http->path); - n += strlen(b->url->spec); #ifdef FORCE_LOAD - n += strlen(http->hostport); - n += strlen(http->path); + factor++; #endif /* def FORCE_LOAD */ - p = (char *)malloc(n); + n = strlen(CBLOCK); + n += factor * strlen(http->hostport); + n += factor * strlen(http->path); + + p = (char *)malloc(n); #ifdef FORCE_LOAD - sprintf(p, CBLOCK, http->hostport, http->path, b->url->spec, http->hostport, http->path); + sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path, + http->hostport, http->path); #else - sprintf(p, CBLOCK, http->hostport, http->path, b->url->spec); + sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path); #endif /* def FORCE_LOAD */ - return(p); - } - } - } + return(p); } - freez(url->dbuf); - freez(url->dvec); - return(NULL); - } -#if defined(DETECT_MSIE_IMAGES) || defined(USE_IMAGE_LIST) +#ifdef IMAGE_BLOCKING /********************************************************************* * * Function : block_imageurl @@ -470,81 +475,9 @@ int block_imageurl(struct http_request *http, struct client_state *csp) } #endif -#if defined(USE_IMAGE_LIST) - return block_imageurl_using_imagelist(http, csp); -#else - /* Don't know - assume HTML */ - return 0; -#endif -} -#endif /* defined(DETECT_MSIE_IMAGES) || defined(USE_IMAGE_LIST) */ - - -#ifdef USE_IMAGE_LIST -/********************************************************************* - * - * Function : block_imageurl - * - * Description : Test if a URL is in the imagelist. - * - * Parameters : - * 1 : http = URL to check. - * 2 : csp = Current client state (buffers, headers, etc...) - * - * Returns : True (nonzero) if URL is in image list, false (0) - * otherwise - * - *********************************************************************/ -int block_imageurl_using_imagelist(struct http_request *http, struct client_state *csp) -{ - struct file_list *fl; - struct block_spec *b; - struct url_spec url[1]; - - if (((fl = csp->ilist) == NULL) || ((b = fl->f) == NULL)) - { - return(0); - } - - *url = dsplit(http->host); - - /* if splitting the domain fails, punt */ - if (url->dbuf == NULL) return(0); - - for (b = b->next; b ; b = b->next) - { - - if ((b->url->port == 0) || (b->url->port == http->port)) - { - /* port matches, check domain */ - if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0)) - { - /* domain matches, check path */ - if ((b->url->path == NULL) || -#ifdef REGEX - (regexec(b->url->preg, http->path, 0, NULL, 0) == 0) -#else - (strncmp(b->url->path, http->path, b->url->pathlen) == 0) -#endif - ) - { - /* Matches */ - freez(url->dbuf); - freez(url->dvec); - - if (b->reject == 0) return(0); - - return(1); - } - } - } - } - freez(url->dbuf); - freez(url->dvec); - return(0); - + return ((csp->permissions & PERMIT_IMAGE) != 0); } -#endif /* def USE_IMAGE_LIST */ +#endif /* def IMAGE_BLOCKING */ #ifdef PCRS @@ -576,13 +509,13 @@ char *re_process_buffer(struct client_state *csp) /* Sanity first ;-) */ if (size <= 0) { - return; + return(strdup("")); } if ( ( NULL == (fl = csp->rlist) ) || ( NULL == (b = fl->f) ) ) { log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering."); - return; + return(strdup("")); } joblist = b->joblist; @@ -821,6 +754,26 @@ trust_url_not_trusted: #endif /* def TRUST_FILES */ +static const char C_HOME_PAGE[] = + "HTTP/1.0 200 OK\n" + "Pragma: no-cache\n" + "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n" + "Content-Type: text/html\n\n" + "\n" + "\n" + "Internet Junkbuster: Information\n" + "\n" + BODY + "

" + BANNER + "

\n" + "

JunkBuster web site

\n" + "

Proxy configuration

\n" + "

Look up a URL

\n" + "\n" + "\n"; + + /********************************************************************* * * Function : intercept_url @@ -836,33 +789,71 @@ trust_url_not_trusted: * 1 : http = http_request request, check `basename's of blocklist * 2 : csp = Current client state (buffers, headers, etc...) * - * Returns : NULL for no recognized URLs, or an HTML description page. + * Returns : 1 if it intercepts & handles the request. * *********************************************************************/ -char *intercept_url(struct http_request *http, struct client_state *csp) +int intercept_url(struct http_request *http, struct client_state *csp) { - char *basename; + char *basename = NULL; const struct interceptors *v; - basename = strrchr(http->path, '/'); + if (0 == strcmpic(http->host,"i.j.b")) + { + /* + * Catch http://i.j.b/... + */ + basename = http->path; + } + else if ( ( (0 == strcmpic(http->host,"ijbswa.sourceforge.net")) + || (0 == strcmpic(http->host,"ijbswa.sf.net")) ) + && (0 == strncmpic(http->path,"/config", 7)) + && ((http->path[7] == '/') || (http->path[7] == '\0'))) + { + /* + * Catch http://ijbswa.sourceforge.net/config/... + * and http://ijbswa.sf.net/config/... + */ + basename = http->path + 7; + } + + if (!basename) + { + /* Don't want to intercept */ + return(0); + } - if (basename == NULL) return(NULL); + /* We have intercepted it. */ - basename ++; /* first char past the last slash */ + /* remove any leading slash */ + if (*basename == '/') + { + basename++; + } + + log_error(LOG_LEVEL_GPC, "%s%s intercepted!", http->hostport, http->path); + log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 200 3", + csp->ip_addr_str, http->cmd); - if (*basename) + for (v = intercept_patterns; v->str; v++) { - for (v = intercept_patterns; v->str; v++) + if (strncmp(basename, v->str, v->len) == 0) { - if (strncmp(basename, v->str, v->len) == 0) + char * p = ((v->interceptor)(http, csp)); + + if (p != NULL) { - return((v->interceptor)(http, csp)); + /* Send HTML redirection result */ + write_socket(csp->cfd, p, strlen(p)); + + freez(p); } + return(1); } } - return(NULL); + write_socket(csp->cfd, C_HOME_PAGE, strlen(C_HOME_PAGE)); + return(1); } #ifdef FAST_REDIRECTS @@ -929,10 +920,11 @@ int url_permissions(struct http_request *http, struct client_state *csp) struct file_list *fl; struct permissions_spec *b; struct url_spec url[1]; + int permissions = csp->config->default_permissions; if (((fl = csp->permissions_list) == NULL) || ((b = fl->f) == NULL)) { - return(csp->config->default_permissions); + return(permissions); } *url = dsplit(http->host); @@ -940,7 +932,7 @@ int url_permissions(struct http_request *http, struct client_state *csp) /* if splitting the domain fails, punt */ if (url->dbuf == NULL) { - return(csp->config->default_permissions); + return(permissions); } for (b = b->next; NULL != b; b = b->next) @@ -957,9 +949,8 @@ int url_permissions(struct http_request *http, struct client_state *csp) #endif ) { - freez(url->dbuf); - freez(url->dvec); - return(b->permissions); + permissions &= b->mask; + permissions |= b->add; } } } @@ -967,7 +958,7 @@ int url_permissions(struct http_request *http, struct client_state *csp) freez(url->dbuf); freez(url->dvec); - return(csp->config->default_permissions); + return(permissions); } @@ -1175,13 +1166,6 @@ char *show_proxy_args(struct http_request *http, struct client_state *csp) } switch (which_file) { - case 'b': - if (csp->blist) - { - filename = csp->blist->filename; - file_description = "Block List"; - } - break; case 'p': if (csp->permissions_list) { @@ -1207,16 +1191,6 @@ char *show_proxy_args(struct http_request *http, struct client_state *csp) break; #endif /* def ACL_FILES */ -#ifdef USE_IMAGE_LIST - case 'i': - if (csp->ilist) - { - filename = csp->ilist->filename; - file_description = "Image List"; - } - break; -#endif /* def USE_IMAGE_LIST */ - #ifdef PCRS case 'r': if (csp->rlist) @@ -1311,12 +1285,12 @@ char *show_proxy_args(struct http_request *http, struct client_state *csp) } #endif /* def SPLIT_PROXY_ARGS */ - s = strsav(s, csp->config->proxy_args->header); - s = strsav(s, csp->config->proxy_args->invocation); + s = strsav(s, csp->config->proxy_args_header); + s = strsav(s, csp->config->proxy_args_invocation); #ifdef STATISTICS s = add_stats(s); #endif /* def STATISTICS */ - s = strsav(s, csp->config->proxy_args->gateways); + s = strsav(s, csp->config->proxy_args_gateways); #ifdef SPLIT_PROXY_ARGS s = strsav(s, @@ -1324,13 +1298,6 @@ char *show_proxy_args(struct http_request *http, struct client_state *csp) "

(Click a filename to view it)

\n" ""); #else /* ifndef SPLIT_PROXY_ARGS */ - if (csp->blist) - { - s = strsav(s, csp->blist->proxy_args); - } - if (csp->clist) { s = strsav(s, csp->clist->proxy_args); @@ -1406,13 +1359,6 @@ char *show_proxy_args(struct http_request *http, struct client_state *csp) } #endif /* def ACL_FILES */ -#ifdef USE_IMAGE_LIST - if (csp->ilist) - { - s = strsav(s, csp->ilist->proxy_args); - } -#endif /* def USE_IMAGE_LIST */ - #ifdef PCRS if (csp->rlist) { @@ -1429,33 +1375,250 @@ char *show_proxy_args(struct http_request *http, struct client_state *csp) #endif /* ndef SPLIT_PROXY_ARGS */ - s = strsav(s, csp->config->proxy_args->trailer); + s = strsav(s, csp->config->proxy_args_trailer); return(s); } +static const char C_URL_INFO_HEADER[] = + "HTTP/1.0 200 OK\n" + "Pragma: no-cache\n" + "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n" + "Content-Type: text/html\n\n" + "\n" + "\n" + "Internet Junkbuster: URL Info\n" + "\n" + BODY + "

" + BANNER + "

\n" + "

Information for: http://%s

\n"; +static const char C_URL_INFO_FOOTER[] = + "\n

\n" + "\n" + "\n"; + +static const char C_URL_INFO_FORM[] = + "HTTP/1.0 200 OK\n" + "Pragma: no-cache\n" + "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n" + "Content-Type: text/html\n\n" + "\n" + "\n" + "Internet Junkbuster: URL Info\n" + "\n" + BODY + "

" + BANNER + "

\n" + "
\n" + "

Please enter a URL, without the leading "http://":

" + "

" + "

\n" + "
\n" + "\n" + "\n"; + + +/********************************************************************* + * + * Function : permissions_to_text + * + * Description : Converts a permissionsfil entry from numeric form + * ("mask" and "add") to text. + * + * Parameters : + * 1 : mask = As from struct permissions_spec + * 2 : add = As from struct permissions_spec + * + * Returns : A string. Caller must free it. + * + *********************************************************************/ +char * permissions_to_text(unsigned mask, unsigned add) +{ + char * result = strdup(""); + + /* sanity - prevents "-feature +feature" */ + mask |= add; + +#define PERMISSION_TO_TEXT(__bit, __name) \ + if (!(mask & __bit)) \ + { \ + result = strsav(result, " -" __name); \ + } \ + else if (add & __bit) \ + { \ + result = strsav(result, " +" __name); \ + } + + PERMISSION_TO_TEXT(PERMIT_COOKIE_SET, "cookies-set"); + PERMISSION_TO_TEXT(PERMIT_COOKIE_READ, "cookies-read"); + PERMISSION_TO_TEXT(PERMIT_RE_FILTER, "filter"); + PERMISSION_TO_TEXT(PERMIT_POPUPS, "popup"); + PERMISSION_TO_TEXT(PERMIT_REFERER, "referer"); + PERMISSION_TO_TEXT(PERMIT_FAST_REDIRECTS, "fast-redirects"); + PERMISSION_TO_TEXT(PERMIT_BLOCK, "block"); + PERMISSION_TO_TEXT(PERMIT_IMAGE, "image"); + + return result; +} + + + /********************************************************************* + * + * Function : ijb_show_url_info + * + * Description : (please fill me in) + * + * Parameters : + * 1 : http = http_request request for crunched URL + * 2 : csp = Current client state (buffers, headers, etc...) + * + * Returns : ???FIXME + * + *********************************************************************/ +char *ijb_show_url_info(struct http_request *http, struct client_state *csp) +{ + char * query_string = strchr(http->path, '?'); + char * host = NULL; + + if (query_string != NULL) + { + query_string = url_decode(query_string + 1); + if (strncmpic(query_string, "url=", 4) == 0) + { + host = strdup(query_string + 4); + } + freez(query_string); + } + if (host != NULL) + { + char * result; + char * path; + char * s; + int port = 80; + struct file_list *fl; + struct permissions_spec *b; + struct url_spec url[1]; + int permissions = csp->config->default_permissions; + + result = (char *)malloc(sizeof(C_URL_INFO_HEADER) + 2 * strlen(host)); + sprintf(result, C_URL_INFO_HEADER, host, host); + + s = permissions_to_text(permissions, permissions); + result = strsav(result, "

Defaults:

\n

{"); + result = strsav(result, s); + result = strsav(result, " }

\n

Patterns affecting the URL:

\n

\n"); + freez(s); + + s = strchr(host, '/'); + if (s != NULL) + { + path = strdup(s); + *s = '\0'; + } + else + { + path = strdup(""); + } + s = strchr(host, ':'); + if (s != NULL) + { + *s++ = '\0'; + port = atoi(s); + } + + if (((fl = csp->permissions_list) == NULL) || ((b = fl->f) == NULL)) + { + freez(host); + freez(path); + result = strsav(result, C_URL_INFO_FOOTER); + return result; + } + + *url = dsplit(host); + + /* if splitting the domain fails, punt */ + if (url->dbuf == NULL) + { + freez(host); + freez(path); + result = strsav(result, C_URL_INFO_FOOTER); + return result; + } + + for (b = b->next; NULL != b; b = b->next) + { + if ((b->url->port == 0) || (b->url->port == port)) + { + if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0)) + { + if ((b->url->path == NULL) || +#ifdef REGEX + (regexec(b->url->preg, path, 0, NULL, 0) == 0) +#else + (strncmp(b->url->path, path, b->url->pathlen) == 0) +#endif + ) + { + s = permissions_to_text(b->mask, b->add); + result = strsav(result, "{"); + result = strsav(result, s); + result = strsav(result, " }
\n"); + result = strsav(result, b->url->spec); + result = strsav(result, "
\n
\n"); + freez(s); + permissions &= b->mask; + permissions |= b->add; + } + } + } + } + + freez(url->dbuf); + freez(url->dvec); + + freez(host); + freez(path); + + s = permissions_to_text(permissions, permissions); + result = strsav(result, "

\n

Final Results:

\n

{"); + result = strsav(result, s); + result = strsav(result, " }
\n
\n"); + freez(s); + + result = strsav(result, C_URL_INFO_FOOTER); + return result; + } + else + { + return strdup(C_URL_INFO_FORM); + } +} + + /********************************************************************* * * Function : ijb_send_banner * - * Description : This "crunch"es "http:/any.thing/ijb-send-banner and - * thus triggers sending the image in jcc.c:chat. - * For the unlikely case, that the imagefile/MSIE - * mechanism is not used, or tinygif = 0, a page - * describing the reson of the interception is generated. + * Description : This "crunch"es "http://i.j.b/ijb-send-banner and + * sends the image. * * Parameters : * 1 : http = http_request request for crunched URL * 2 : csp = Current client state (buffers, headers, etc...) * - * Returns : A string that contains why this was intercepted. + * Returns : NULL, indicating that it has already sent the data. * *********************************************************************/ char *ijb_send_banner(struct http_request *http, struct client_state *csp) { - return(strdup(CNOBANNER)); + write_socket(csp->cfd, JBGIF, sizeof(JBGIF)-1); + + return(NULL); } #ifdef TRUST_FILES