1 const char filters_rcs[] = "$Id: filters.c,v 1.12 2001/05/31 17:35:20 oes Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/filters.c,v $
6 * Purpose : Declares functions to parse/crunch headers and pages.
7 * Functions declared include:
8 * `acl_addr', `add_stats', `block_acl', `block_imageurl',
9 * `block_url', `url_actions', `domaincmp', `dsplit',
10 * `filter_popups', `forward_url', 'redirect_url',
11 * `ij_untrusted_url', `intercept_url', `re_process_buffer',
12 * `show_proxy_args', 'ijb_send_banner', and `trust_url'
14 * Copyright : Written by and Copyright (C) 2001 the SourceForge
15 * IJBSWA team. http://ijbswa.sourceforge.net
17 * Based on the Internet Junkbuster originally written
18 * by and Copyright (C) 1997 Anonymous Coders and
19 * Junkbusters Corporation. http://www.junkbusters.com
21 * This program is free software; you can redistribute it
22 * and/or modify it under the terms of the GNU General
23 * Public License as published by the Free Software
24 * Foundation; either version 2 of the License, or (at
25 * your option) any later version.
27 * This program is distributed in the hope that it will
28 * be useful, but WITHOUT ANY WARRANTY; without even the
29 * implied warranty of MERCHANTABILITY or FITNESS FOR A
30 * PARTICULAR PURPOSE. See the GNU General Public
31 * License for more details.
33 * The GNU General Public License should be included with
34 * this file. If not, you can view it at
35 * http://www.gnu.org/copyleft/gpl.html
36 * or write to the Free Software Foundation, Inc., 59
37 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41 * Revision 1.12 2001/05/31 17:35:20 oes
43 * - Enhanced domain part globbing with infix and prefix asterisk
44 * matching and optional unanchored operation
46 * Revision 1.11 2001/05/29 11:53:23 oes
47 * "See why" link added to "blocked" page
49 * Revision 1.10 2001/05/29 09:50:24 jongfoster
50 * Unified blocklist/imagelist/permissionslist.
51 * File format is still under discussion, but the internal changes
54 * Also modified interceptor behaviour:
55 * - We now intercept all URLs beginning with one of the following
56 * prefixes (and *only* these prefixes):
58 * * http://ijbswa.sf.net/config/
59 * * http://ijbswa.sourceforge.net/config/
60 * - New interceptors "home page" - go to http://i.j.b/ to see it.
61 * - Internal changes so that intercepted and fast redirect pages
62 * are not replaced with an image.
63 * - Interceptors now have the option to send a binary page direct
64 * to the client. (i.e. ijb-send-banner uses this)
65 * - Implemented show-url-info interceptor. (Which is why I needed
66 * the above interceptors changes - a typical URL is
67 * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
68 * The previous mechanism would not have intercepted that, and
69 * if it had been intercepted then it then it would have replaced
72 * Revision 1.9 2001/05/27 22:17:04 oes
74 * - re_process_buffer no longer writes the modified buffer
75 * to the client, which was very ugly. It now returns the
76 * buffer, which it is then written by chat.
78 * - content_length now adjusts the Content-Length: header
79 * for modified documents rather than crunch()ing it.
80 * (Length info in csp->content_length, which is 0 for
81 * unmodified documents)
83 * - For this to work, sed() is called twice when filtering.
85 * Revision 1.8 2001/05/26 17:13:28 jongfoster
86 * Filled in a function comment.
88 * Revision 1.7 2001/05/26 15:26:15 jongfoster
89 * ACL feature now provides more security by immediately dropping
90 * connections from untrusted hosts.
92 * Revision 1.6 2001/05/26 00:28:36 jongfoster
93 * Automatic reloading of config file.
94 * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32).
95 * Most of the global variables have been moved to a new
96 * struct configuration_spec, accessed through csp->config->globalname
97 * Most of the globals remaining are used by the Win32 GUI.
99 * Revision 1.5 2001/05/25 22:34:30 jongfoster
102 * Revision 1.4 2001/05/22 18:46:04 oes
104 * - Enabled filtering banners by size rather than URL
105 * by adding patterns that replace all standard banner
106 * sizes with the "Junkbuster" gif to the re_filterfile
108 * - Enabled filtering WebBugs by providing a pattern
109 * which kills all 1x1 images
111 * - Added support for PCRE_UNGREEDY behaviour to pcrs,
112 * which is selected by the (nonstandard and therefore
113 * capital) letter 'U' in the option string.
114 * It causes the quantifiers to be ungreedy by default.
115 * Appending a ? turns back to greedy (!).
117 * - Added a new interceptor ijb-send-banner, which
118 * sends back the "Junkbuster" gif. Without imagelist or
119 * MSIE detection support, or if tinygif = 1, or the
120 * URL isn't recognized as an imageurl, a lame HTML
121 * explanation is sent instead.
123 * - Added new feature, which permits blocking remote
124 * script redirects and firing back a local redirect
126 * The feature is conditionally compiled, i.e. it
127 * can be disabled with --disable-fast-redirects,
128 * plus it must be activated by a "fast-redirects"
129 * line in the config file, has its own log level
130 * and of course wants to be displayed by show-proxy-args
131 * Note: Boy, all the #ifdefs in 1001 locations and
132 * all the fumbling with configure.in and acconfig.h
133 * were *way* more work than the feature itself :-(
135 * - Because a generic redirect template was needed for
136 * this, tinygif = 3 now uses the same.
138 * - Moved GIFs, and other static HTTP response templates
143 * - Removed some >400 CRs again (Jon, you really worked
146 * Revision 1.3 2001/05/20 16:44:47 jongfoster
147 * Removing last hardcoded JunkBusters.com URLs.
149 * Revision 1.2 2001/05/20 01:21:20 jongfoster
150 * Version 2.9.4 checkin.
151 * - Merged popupfile and cookiefile, and added control over PCRS
152 * filtering, in new "permissionsfile".
153 * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration
154 * file error you now get a message box (in the Win32 GUI) rather
155 * than the program exiting with no explanation.
156 * - Made killpopup use the PCRS MIME-type checking and HTTP-header
158 * - Removed tabs from "config"
159 * - Moved duplicated url parsing code in "loaders.c" to a new funcition.
160 * - Bumped up version number.
162 * Revision 1.1.1.1 2001/05/15 13:58:52 oes
163 * Initial import of version 2.9.3 source tree
166 *********************************************************************/
172 #include <sys/types.h>
179 #include <netinet/in.h>
181 #include <winsock2.h>
188 #include "showargs.h"
192 #include "jbsockets.h"
194 #include "jbsockets.h"
195 #include "miscutil.h"
202 const char filters_h_rcs[] = FILTERS_H_VERSION;
204 /* Fix a problem with Solaris. There should be no effect on other
206 * Solaris's isspace() is a macro which uses it's argument directly
207 * as an array index. Therefore we need to make sure that high-bit
208 * characters generate +ve values, and ideally we also want to make
209 * the argument match the declared parameter type of "int".
211 #define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
214 static const char CBLOCK[] =
216 "HTTP/1.0 403 Request for blocked URL\n"
217 #else /* ifndef AMIGA */
218 "HTTP/1.0 202 Request for blocked URL\n"
219 #endif /* ndef AMIGA */
221 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
222 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
223 "Content-Type: text/html\n\n"
226 "<title>Internet Junkbuster: Request for blocked URL</title>\n"
232 "<p align=center>Your request for <b>%s%s</b>\n"
233 "was blocked.<br><a href=\"http://i.j.b/show-url-info?url=%s%s\">See why</a>"
235 " or <a href=\"http://%s" FORCE_PREFIX "%s\">"
236 "go there anyway.</a>"
237 #endif /* def FORCE_LOAD */
243 static const char CTRUST[] =
245 "HTTP/1.0 403 Request for untrusted URL\n"
246 #else /* ifndef AMIGA */
247 "HTTP/1.0 202 Request for untrusted URL\n"
248 #endif /* ndef AMIGA */
250 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
251 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
252 "Content-Type: text/html\n\n"
255 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
259 "<a href=http://i.j.b/ij-untrusted-url?%s+%s+%s>"
265 #endif /* def TRUST_FILES */
269 /*********************************************************************
271 * Function : block_acl
273 * Description : Block this request?
274 * Decide yes or no based on ACL file.
277 * 1 : dst = The proxy or gateway address this is going to.
278 * Or NULL to check all possible targets.
279 * 2 : csp = Current client state (buffers, headers, etc...)
280 * Also includes the client IP address.
282 * Returns : 0 = FALSE (don't block) and 1 = TRUE (do block)
284 *********************************************************************/
285 int block_acl(struct access_control_addr *dst,
286 struct client_state *csp)
288 struct file_list *fl;
289 struct access_control_list *a, *acl;
291 /* if not using an access control list, then permit the connection */
292 if (((fl = csp->alist) == NULL) ||
293 ((acl = (struct access_control_list *) fl->f) == NULL))
298 /* search the list */
299 for (a = acl->next ; a ; a = a->next)
301 if ((csp->ip_addr_long & a->src->mask) == a->src->addr)
305 /* Just want to check if they have any access */
306 if (a->action == ACL_PERMIT)
311 else if ( ((dst->addr & a->dst->mask) == a->dst->addr)
312 && ((dst->port == a->dst->port) || (a->dst->port == 0)))
314 if (a->action == ACL_PERMIT)
331 /*********************************************************************
333 * Function : acl_addr
335 * Description : Called from `load_aclfile' to parse an ACL address.
338 * 1 : aspec = String specifying ACL address.
339 * 2 : aca = struct access_control_addr to fill in.
341 * Returns : 0 => Ok, everything else is an error.
343 *********************************************************************/
344 int acl_addr(char *aspec, struct access_control_addr *aca)
346 int i, masklength, port;
352 if ((p = strchr(aspec, '/')))
356 if (ijb_isdigit(*p) == 0)
360 masklength = atoi(p);
363 if ((masklength < 0) || (masklength > 32))
368 if ((p = strchr(aspec, ':')))
372 if (ijb_isdigit(*p) == 0)
381 aca->addr = ntohl(resolve_hostname_to_ip(aspec));
385 log_error(LOG_LEVEL_ERROR, "can't resolve address for %s", aspec);
389 /* build the netmask */
391 for (i=1; i <= masklength ; i++)
393 aca->mask |= (1 << (32 - i));
396 /* now mask off the host portion of the ip address
397 * (i.e. save on the network portion of the address).
399 aca->addr = aca->addr & aca->mask;
404 #endif /* def ACL_FILES */
407 /*********************************************************************
409 * Function : block_url
411 * Description : Called from `chat'. Check to see if we need to block this.
414 * 1 : http = http_request request to "check" for blocked
415 * 2 : csp = Current client state (buffers, headers, etc...)
417 * Returns : NULL => unblocked, else string to HTML block description.
419 *********************************************************************/
420 char *block_url(struct http_request *http, struct client_state *csp)
426 if ((csp->action->flags & ACTION_BLOCK) == 0)
434 #endif /* def FORCE_LOAD */
437 n += factor * strlen(http->hostport);
438 n += factor * strlen(http->path);
440 p = (char *)malloc(n);
443 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path,
444 http->hostport, http->path);
446 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path);
447 #endif /* def FORCE_LOAD */
454 #ifdef IMAGE_BLOCKING
455 /*********************************************************************
457 * Function : block_imageurl
459 * Description : Given a URL which is blocked, decide whether to
460 * send the "blocked" image or HTML.
463 * 1 : http = URL to check.
464 * 2 : csp = Current client state (buffers, headers, etc...)
466 * Returns : True (nonzero) if URL is in image list, false (0)
469 *********************************************************************/
470 int block_imageurl(struct http_request *http, struct client_state *csp)
472 #ifdef DETECT_MSIE_IMAGES
473 if ((csp->accept_types
474 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
475 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE))
479 else if ((csp->accept_types
480 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
481 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_HTML))
487 return ((csp->action->flags & ACTION_IMAGE) != 0);
489 #endif /* def IMAGE_BLOCKING */
493 /*********************************************************************
495 * Function : re_process_buffer
497 * Description : Apply all jobs from the joblist (aka. Perl regexp's) to
498 * the text buffer that's been accumulated in csp->iob->buf
499 * and set csp->content_length to the modified size.
502 * 1 : csp = Current client state (buffers, headers, etc...)
504 * Returns : a pointer to the (newly allocated) modified buffer.
507 *********************************************************************/
508 char *re_process_buffer(struct client_state *csp)
511 int size = csp->iob->eod - csp->iob->cur;
512 char *old=csp->iob->cur, *new = NULL;
513 pcrs_job *job, *joblist;
515 struct file_list *fl;
516 struct re_filterfile_spec *b;
518 /* Sanity first ;-) */
524 if ( ( NULL == (fl = csp->rlist) ) || ( NULL == (b = fl->f) ) )
526 log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering.");
530 joblist = b->joblist;
533 log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) ...",
534 csp->http->hostport, csp->http->path, size);
536 /* Apply all jobs from the joblist */
537 for (job = joblist; NULL != job; job = job->next)
539 hits += pcrs_exec_substitution(job, old, size, &new, &size);
540 if (old != csp->iob->cur) free(old);
544 log_error(LOG_LEVEL_RE_FILTER, " produced %d hits (new size %d).", hits, size);
546 csp->content_length = size;
548 /* fwiw, reset the iob */
553 #endif /* def PCRS */
557 /*********************************************************************
559 * Function : trust_url
561 * Description : Should we "trust" this URL? See "trustfile" line in config.
564 * 1 : http = http_request request for requested URL
565 * 2 : csp = Current client state (buffers, headers, etc...)
567 * Returns : NULL => trusted, else string to HTML "untrusted" description.
569 *********************************************************************/
570 char *trust_url(struct http_request *http, struct client_state *csp)
572 struct file_list *fl;
573 struct block_spec *b;
574 struct url_spec url[1], **tl, *t;
576 char *hostport, *path, *refer;
577 struct http_request rhttp[1];
580 if (((fl = csp->tlist) == NULL) || ((b = fl->f) == NULL))
585 *url = dsplit(http->host);
587 /* if splitting the domain fails, punt */
588 if (url->dbuf == NULL) return(NULL);
590 memset(rhttp, '\0', sizeof(*rhttp));
592 for (b = b->next; b ; b = b->next)
594 if ((b->url->port == 0) || (b->url->port == http->port))
596 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
598 if ((b->url->path == NULL) ||
600 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
602 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
609 if (b->reject == 0) return(NULL);
611 hostport = url_encode(http->hostport);
612 path = url_encode(http->path);
616 refer = url_encode(csp->referrer);
620 refer = url_encode("undefined");
624 n += strlen(hostport);
628 p = (char *)malloc(n);
630 sprintf(p, CTRUST, hostport, path, refer);
645 if ((csp->referrer == NULL)|| (strlen(csp->referrer) <= 9))
647 /* no referrer was supplied */
648 goto trust_url_not_trusted;
651 /* forge a URL from the referrer so we can use
652 * convert_url() to parse it into its components.
656 p = strsav(p, "GET ");
657 p = strsav(p, csp->referrer + 9); /* skip over "Referer: " */
658 p = strsav(p, " HTTP/1.0");
660 parse_http_request(p, rhttp, csp);
662 if (rhttp->cmd == NULL)
665 goto trust_url_not_trusted;
670 *url = dsplit(rhttp->host);
672 /* if splitting the domain fails, punt */
673 if (url->dbuf == NULL) goto trust_url_not_trusted;
675 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
677 if ((t->port == 0) || (t->port == rhttp->port))
679 if ((t->domain[0] == '\0') || domaincmp(t, url) == 0)
681 if ((t->path == NULL) ||
683 (regexec(t->preg, rhttp->path, 0, NULL, 0) == 0)
685 (strncmp(t->path, rhttp->path, t->pathlen) == 0)
689 /* if the URL's referrer is from a trusted referrer, then
690 * add the target spec to the trustfile as an unblocked
691 * domain and return NULL (which means it's OK).
699 if ((fp = fopen(csp->config->trustfile, "a")))
704 h = strsav(h, http->hostport);
710 /* since this path points into a user's home space
711 * be sure to include this spec in the trustfile.
713 if ((p = strchr(p, '/')))
716 h = strsav(h, http->path);
721 free_http_request(rhttp);
723 fprintf(fp, "%s\n", h);
733 trust_url_not_trusted:
734 free_http_request(rhttp);
736 hostport = url_encode(http->hostport);
737 path = url_encode(http->path);
741 refer = url_encode(csp->referrer);
745 refer = url_encode("undefined");
749 n += strlen(hostport);
753 p = (char *)malloc(n);
754 sprintf(p, CTRUST, hostport, path, refer);
763 #endif /* def TRUST_FILES */
766 static const char C_HOME_PAGE[] =
769 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
770 "Content-Type: text/html\n\n"
773 "<title>Internet Junkbuster: Information</title>\n"
779 "<p><a href=\"" HOME_PAGE_URL "\">JunkBuster web site</a></p>\n"
780 "<p><a href=\"http://i.j.b/show-proxy-arg\">Proxy configuration</a></p>\n"
781 "<p><a href=\"http://i.j.b/show-url-info\">Look up a URL</a></p>\n"
786 /*********************************************************************
788 * Function : intercept_url
790 * Description : checks the URL `basename' against a list of URLs to
791 * snarf. If it matches, it calls the associated function
792 * which returns an HTML page to send back to the client.
793 * Right now, we snarf:
794 * "show-proxy-args", and
795 * "ij-untrusted-url" (optional w/TRUST_FILES)
798 * 1 : http = http_request request, check `basename's of blocklist
799 * 2 : csp = Current client state (buffers, headers, etc...)
801 * Returns : 1 if it intercepts & handles the request.
803 *********************************************************************/
804 int intercept_url(struct http_request *http, struct client_state *csp)
806 char *basename = NULL;
807 const struct interceptors *v;
809 if (0 == strcmpic(http->host,"i.j.b"))
812 * Catch http://i.j.b/...
814 basename = http->path;
816 else if ( ( (0 == strcmpic(http->host,"ijbswa.sourceforge.net"))
817 || (0 == strcmpic(http->host,"ijbswa.sf.net")) )
818 && (0 == strncmpic(http->path,"/config", 7))
819 && ((http->path[7] == '/') || (http->path[7] == '\0')))
822 * Catch http://ijbswa.sourceforge.net/config/...
823 * and http://ijbswa.sf.net/config/...
825 basename = http->path + 7;
830 /* Don't want to intercept */
834 /* We have intercepted it. */
836 /* remove any leading slash */
837 if (*basename == '/')
842 log_error(LOG_LEVEL_GPC, "%s%s intercepted!", http->hostport, http->path);
843 log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 200 3",
844 csp->ip_addr_str, http->cmd);
846 for (v = intercept_patterns; v->str; v++)
848 if (strncmp(basename, v->str, v->len) == 0)
850 char * p = ((v->interceptor)(http, csp));
854 /* Send HTML redirection result */
855 write_socket(csp->cfd, p, strlen(p));
863 write_socket(csp->cfd, C_HOME_PAGE, strlen(C_HOME_PAGE));
868 #ifdef FAST_REDIRECTS
869 /*********************************************************************
871 * Function : redirect_url
873 * Description : Checks for redirection URLs and returns a HTTP redirect
874 * to the destination URL.
877 * 1 : http = http_request request, check `basename's of blocklist
878 * 2 : csp = Current client state (buffers, headers, etc...)
880 * Returns : NULL if URL was clean, HTTP redirect otherwise.
882 *********************************************************************/
883 char *redirect_url(struct http_request *http, struct client_state *csp)
887 p = q = csp->http->path;
888 log_error(LOG_LEVEL_REDIRECTS, "checking path: %s", p);
890 /* find the last URL encoded in the request */
891 while (p = strstr(p, "http://"))
896 /* if there was any, generate and return a HTTP redirect */
897 if (q != csp->http->path)
899 log_error(LOG_LEVEL_REDIRECTS, "redirecting to: %s", q);
901 p = (char *)malloc(strlen(HTTP_REDIRECT_TEMPLATE) + strlen(q));
902 sprintf(p, HTTP_REDIRECT_TEMPLATE, q);
911 #endif /* def FAST_REDIRECTS */
913 /*********************************************************************
915 * Function : url_actions
917 * Description : Gets the actions for this URL.
920 * 1 : http = http_request request for blocked URLs
921 * 2 : csp = Current client state (buffers, headers, etc...)
925 *********************************************************************/
926 void url_actions(struct http_request *http,
927 struct client_state *csp)
929 struct file_list *fl;
930 struct url_actions *b;
932 init_current_action(csp->action);
934 if (((fl = csp->actions_list) == NULL) || ((b = fl->f) == NULL))
939 apply_url_actions(csp->action, http, b);
943 /*********************************************************************
945 * Function : apply_url_actions
947 * Description : Applies a list of URL actions.
950 * 1 : action = Destination.
951 * 2 : http = Current URL
952 * 3 : b = list of URL actions to apply
956 *********************************************************************/
957 void apply_url_actions(struct current_action_spec *action,
958 struct http_request *http,
959 struct url_actions *b)
961 struct url_spec url[1];
965 /* Should never happen */
969 *url = dsplit(http->host);
971 /* if splitting the domain fails, punt */
972 if (url->dbuf == NULL)
977 for (b = b->next; NULL != b; b = b->next)
979 if ((b->url->port == 0) || (b->url->port == http->port))
981 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
983 if ((b->url->path == NULL) ||
985 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
987 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
991 merge_current_action(action, b->action);
1002 /*********************************************************************
1004 * Function : forward_url
1006 * Description : Should we forward this to another proxy?
1009 * 1 : http = http_request request for current URL
1010 * 2 : csp = Current client state (buffers, headers, etc...)
1012 * Returns : Return gw_default for no forward match,
1013 * else a gateway pointer to a specific forwarding proxy.
1015 *********************************************************************/
1016 const struct gateway *forward_url(struct http_request *http, struct client_state *csp)
1018 struct file_list *fl;
1019 struct forward_spec *b;
1020 struct url_spec url[1];
1022 if (((fl = csp->flist) == NULL) || ((b = fl->f) == NULL))
1027 *url = dsplit(http->host);
1029 /* if splitting the domain fails, punt */
1030 if (url->dbuf == NULL) return(gw_default);
1032 for (b = b->next; b ; b = b->next)
1034 if ((b->url->port == 0) || (b->url->port == http->port))
1036 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1038 if ((b->url->path == NULL) ||
1040 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
1042 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
1061 /*********************************************************************
1065 * Description : Takes a domain and returns a pointer to a url_spec
1066 * structure populated with dbuf, dcnt and dvec. The
1067 * other fields in the structure that is returned are zero.
1070 * 1 : domain = a URL address
1072 * Returns : url_spec structure populated with dbuf, dcnt and dvec.
1074 *********************************************************************/
1075 struct url_spec dsplit(char *domain)
1077 struct url_spec ret[1];
1082 memset(ret, '\0', sizeof(*ret));
1084 ret->unanchored = (domain[strlen(domain) - 1] == '.');
1086 ret->dbuf = strdup(domain);
1088 /* map to lower case */
1089 for (p = ret->dbuf; *p ; p++) *p = tolower(*p);
1091 /* split the domain name into components */
1092 ret->dcnt = ssplit(ret->dbuf, ".", v, SZ(v), 1, 1);
1096 memset(ret, '\0', sizeof(ret));
1100 /* save a copy of the pointers in dvec */
1101 size = ret->dcnt * sizeof(*ret->dvec);
1103 if ((ret->dvec = (char **)malloc(size)))
1105 memcpy(ret->dvec, v, size);
1114 /*********************************************************************
1116 * Function : domaincmp
1118 * Description : Compare domain names.
1119 * domaincmp("a.b.c", "a.b.c") => 0 (MATCH)
1120 * domaincmp("a*.b.c", "a.b.c") => 0 (MATCH)
1121 * domaincmp("a*.b.c", "abc.b.c") => 0 (MATCH)
1122 * domaincmp("a*c.b.c","abbc.b.c") => 0 (MATCH)
1123 * domaincmp("*a.b.c", "dabc.b.c") => 0 (MATCH)
1124 * domaincmp("b.c" , "a.b.c") => 0 (MATCH)
1125 * domaincmp("a.b" , "a.b.c") => 1 (DIFF)
1126 * domaincmp("a.b." , "a.b.c") => 0 (MATCH)
1127 * domaincmp("" , "a.b.c") => 0 (MATCH)
1129 * FIXME: I need a definition!
1132 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1133 * 2 : fqdn = domain name against which the patterns are compared.
1135 * Returns : 0 => domains are equivalent, else no match.
1137 *********************************************************************/
1138 int domaincmp(struct url_spec *pattern, struct url_spec *fqdn)
1140 char **pv, **fv; /* vectors */
1141 int pn, fn; /* counters */
1142 char *p, *f; /* chars */
1148 while (fn < fqdn->dcnt && pn < pattern->dcnt)
1153 if (trivimatch(p, f))
1167 return ((pn < pattern->dcnt) || ((fn < fqdn->dcnt) && !pattern->unanchored));
1174 /* intercept functions */
1176 /*********************************************************************
1178 * Function : show_proxy_args
1180 * Description : This "crunch"es "http:/any.thing/show-proxy-args" and
1181 * returns a web page describing the current status of IJB.
1184 * 1 : http = ignored
1185 * 2 : csp = Current client state (buffers, headers, etc...)
1187 * Returns : A string that contains the current status of IJB.
1189 *********************************************************************/
1190 char *show_proxy_args(struct http_request *http, struct client_state *csp)
1194 #ifdef SPLIT_PROXY_ARGS
1198 const char * filename = NULL;
1199 const char * file_description = NULL;
1200 char * query_string = strrchr(http->path, '?');
1201 char which_file = '\0';
1204 if (query_string != NULL)
1206 /* first char past the last '?' (maybe '\0')*/
1207 which_file = query_string[1];
1212 if (csp->actions_list)
1214 filename = csp->actions_list->filename;
1215 file_description = "Actions List";
1221 filename = csp->flist->filename;
1222 file_description = "Forward List";
1230 filename = csp->alist->filename;
1231 file_description = "Access Control List";
1234 #endif /* def ACL_FILES */
1240 filename = csp->rlist->filename;
1241 file_description = "RE Filter List";
1244 #endif /* def PCRS */
1250 filename = csp->tlist->filename;
1251 file_description = "Trust List";
1254 #endif /* def TRUST_FILES */
1259 /* Display specified file */
1260 /* FIXME: Add HTTP headers so this isn't cached */
1263 "Server: IJ/" VERSION "\n"
1264 "Content-type: text/html\n"
1265 "Pragma: no-cache\n"
1266 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1267 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1272 "<title>Internet Junkbuster Proxy Status - ");
1273 s = strsav(s, file_description);
1277 "<body bgcolor=\"#f8f8f0\" link=\"#000078\" alink=\"#ff0022\" vlink=\"#787878\">\n"
1279 "<h1>" BANNER "\n");
1280 s = strsav(s, file_description);
1283 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1285 s = strsav(s, file_description);
1288 "Contents of file "<code>");
1289 p = html_encode(filename);
1293 "</code>":<br>\n"
1297 if ((fp = fopen(filename, "r")) == NULL)
1299 s = strsav(s, "</pre><h1>ERROR OPENING FILE!</h1><pre>");
1303 while (fgets(buf, sizeof(buf), fp))
1305 p = html_encode(buf);
1310 s = strsav(s, "<br>");
1319 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1321 "<small><small><p>\n"
1322 "The " BANNER " Proxy - \n"
1323 "<a href=\"" HOME_PAGE_URL "\">" HOME_PAGE_URL "</a>\n"
1325 "</body></html>\n");
1328 #endif /* def SPLIT_PROXY_ARGS */
1330 s = strsav(s, csp->config->proxy_args_header);
1331 s = strsav(s, csp->config->proxy_args_invocation);
1334 #endif /* def STATISTICS */
1335 s = strsav(s, csp->config->proxy_args_gateways);
1337 #ifdef SPLIT_PROXY_ARGS
1339 "<h2>The following files are in use:</h2>\n"
1340 "<p>(Click a filename to view it)</p>\n"
1343 if (csp->actions_list)
1345 s = strsav(s, "<li>Actions List: <a href=\"show-proxy-args?permit\"><code>");
1346 s = strsav(s, csp->actions_list->filename);
1347 s = strsav(s, "</code></a></li>\n");
1352 s = strsav(s, "<li>Forward List: <a href=\"show-proxy-args?forward\"><code>");
1353 s = strsav(s, csp->flist->filename);
1354 s = strsav(s, "</code></a></li>\n");
1360 s = strsav(s, "<li>Access Control List: <a href=\"show-proxy-args?acl\"><code>");
1361 s = strsav(s, csp->alist->filename);
1362 s = strsav(s, "</code></a></li>\n");
1364 #endif /* def ACL_FILES */
1369 s = strsav(s, "<li>RE Filter List: <a href=\"show-proxy-args?re\"><code>");
1370 s = strsav(s, csp->rlist->filename);
1371 s = strsav(s, "</code></a></li>\n");
1373 #endif /* def PCRS */
1378 s = strsav(s, "<li>Trust List: <a href=\"show-proxy-args?trust\"><code>");
1379 s = strsav(s, csp->tlist->filename);
1380 s = strsav(s, "</code></a></li>\n");
1382 #endif /* def TRUST_FILES */
1384 s = strsav(s, "</ul>");
1386 #else /* ifndef SPLIT_PROXY_ARGS */
1389 s = strsav(s, csp->clist->proxy_args);
1394 s = strsav(s, csp->flist->proxy_args);
1400 s = strsav(s, csp->alist->proxy_args);
1402 #endif /* def ACL_FILES */
1407 s = strsav(s, csp->rlist->proxy_args);
1409 #endif /* def PCRS */
1414 s = strsav(s, csp->tlist->proxy_args);
1416 #endif /* def TRUST_FILES */
1418 #endif /* ndef SPLIT_PROXY_ARGS */
1420 s = strsav(s, csp->config->proxy_args_trailer);
1427 static const char C_URL_INFO_HEADER[] =
1429 "Pragma: no-cache\n"
1430 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1431 "Content-Type: text/html\n\n"
1434 "<title>Internet Junkbuster: URL Info</title>\n"
1440 "<p>Information for: <a href=\"http://%s\">http://%s</a></p>\n";
1441 static const char C_URL_INFO_FOOTER[] =
1446 static const char C_URL_INFO_FORM[] =
1448 "Pragma: no-cache\n"
1449 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1450 "Content-Type: text/html\n\n"
1453 "<title>Internet Junkbuster: URL Info</title>\n"
1459 "<form method=\"GET\" action=\"http://i.j.b/show-url-info\">\n"
1460 "<p>Please enter a URL, without the leading "http://":</p>"
1461 "<p><input type=\"text\" name=\"url\" size=\"80\">"
1462 "<input type=\"submit\" value=\"Info\"></p>\n"
1468 /*********************************************************************
1470 * Function : ijb_show_url_info
1472 * Description : (please fill me in)
1475 * 1 : http = http_request request for crunched URL
1476 * 2 : csp = Current client state (buffers, headers, etc...)
1478 * Returns : ???FIXME
1480 *********************************************************************/
1481 char *ijb_show_url_info(struct http_request *http, struct client_state *csp)
1483 char * query_string = strchr(http->path, '?');
1486 if (query_string != NULL)
1488 query_string = url_decode(query_string + 1);
1489 if (strncmpic(query_string, "url=", 4) == 0)
1491 host = strdup(query_string + 4);
1493 freez(query_string);
1501 struct file_list *fl;
1502 struct url_actions *b;
1503 struct url_spec url[1];
1504 struct current_action_spec action[1];
1506 init_current_action(action);
1508 result = (char *)malloc(sizeof(C_URL_INFO_HEADER) + 2 * strlen(host));
1509 sprintf(result, C_URL_INFO_HEADER, host, host);
1511 s = current_action_to_text(action);
1512 result = strsav(result, "<h3>Defaults:</h3>\n<p><b>{");
1513 result = strsav(result, s);
1514 result = strsav(result, " }</b></p>\n<h3>Patterns affecting the URL:</h3>\n<p>\n");
1517 s = strchr(host, '/');
1527 s = strchr(host, ':');
1534 if (((fl = csp->actions_list) == NULL) || ((b = fl->f) == NULL))
1538 result = strsav(result, C_URL_INFO_FOOTER);
1542 *url = dsplit(host);
1544 /* if splitting the domain fails, punt */
1545 if (url->dbuf == NULL)
1549 result = strsav(result, C_URL_INFO_FOOTER);
1553 for (b = b->next; NULL != b; b = b->next)
1555 if ((b->url->port == 0) || (b->url->port == port))
1557 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1559 if ((b->url->path == NULL) ||
1561 (regexec(b->url->preg, path, 0, NULL, 0) == 0)
1563 (strncmp(b->url->path, path, b->url->pathlen) == 0)
1567 s = actions_to_text(b->action);
1568 result = strsav(result, "<b>{");
1569 result = strsav(result, s);
1570 result = strsav(result, " }</b><br>\n<code>");
1571 result = strsav(result, b->url->spec);
1572 result = strsav(result, "</code><br>\n<br>\n");
1575 merge_current_action(action, b->action);
1587 s = current_action_to_text(action);
1588 result = strsav(result, "</p>\n<h2>Final Results:</h2>\n<p><b>{");
1589 result = strsav(result, s);
1590 result = strsav(result, " }</b><br>\n<br>\n");
1593 free_current_action(action);
1595 result = strsav(result, C_URL_INFO_FOOTER);
1600 return strdup(C_URL_INFO_FORM);
1605 /*********************************************************************
1607 * Function : ijb_send_banner
1609 * Description : This "crunch"es "http://i.j.b/ijb-send-banner and
1613 * 1 : http = http_request request for crunched URL
1614 * 2 : csp = Current client state (buffers, headers, etc...)
1616 * Returns : NULL, indicating that it has already sent the data.
1618 *********************************************************************/
1619 char *ijb_send_banner(struct http_request *http, struct client_state *csp)
1621 write_socket(csp->cfd, JBGIF, sizeof(JBGIF)-1);
1627 /*********************************************************************
1629 * Function : ij_untrusted_url
1631 * Description : This "crunch"es "http:/any.thing/ij-untrusted-url" and
1632 * returns a web page describing why it was untrusted.
1635 * 1 : http = http_request request for crunched URL
1636 * 2 : csp = Current client state (buffers, headers, etc...)
1638 * Returns : A string that contains why this was untrusted.
1640 *********************************************************************/
1641 char *ij_untrusted_url(struct http_request *http, struct client_state *csp)
1644 char *hostport, *path, *refer, *p, *v[9];
1646 struct url_spec **tl, *t;
1649 static const char format[] =
1650 "HTTP/1.0 200 OK\r\n"
1651 "Pragma: no-cache\n"
1652 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1653 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1654 "Content-Type: text/html\n\n"
1657 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
1663 "The " BANNER " Proxy "
1664 "<A href=\"" HOME_PAGE_URL "\">"
1665 "(" HOME_PAGE_URL ") </A>"
1666 "intercepted the request for %s%s\n"
1667 "because the URL is not trusted.\n"
1670 if ((n = ssplit(http->path, "?+", v, SZ(v), 0, 0)) == 4)
1672 hostport = url_decode(v[1]);
1673 path = url_decode(v[2]);
1674 refer = url_decode(v[3]);
1678 hostport = strdup("undefined_host");
1679 path = strdup("/undefined_path");
1680 refer = strdup("undefined");
1684 n += strlen(hostport);
1687 if ((p = (char *)malloc(n)))
1689 sprintf(p, format, hostport, path);
1692 strsav(p, "The referrer in this request was <strong>");
1694 strsav(p, "</strong><br>\n");
1700 p = strsav(p, "<h3>The following referrers are trusted</h3>\n");
1702 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
1704 sprintf(buf, "%s<br>\n", t->spec);
1708 if (csp->config->trust_info->next)
1714 "You can learn more about what this means "
1715 "and what you may be able to do about it by "
1716 "reading the following documents:<br>\n"
1722 for (l = csp->config->trust_info->next; l ; l = l->next)
1725 "<li> <a href=%s>%s</a><br>\n",
1730 p = strsav(p, "</ol>\n");
1733 p = strsav(p, "</body>\n" "</html>\n");
1738 #endif /* def TRUST_FILES */
1742 /*********************************************************************
1744 * Function : add_stats
1746 * Description : Statistics function of JB. Called by `show_proxy_args'.
1749 * 1 : s = string that holds the proxy args description page
1751 * Returns : A pointer to the descriptive status web page.
1753 *********************************************************************/
1754 char *add_stats(char *s)
1757 * Output details of the number of requests rejected and
1758 * accepted. This is switchable in the junkbuster config.
1759 * Does nothing if this option is not enabled.
1762 float perc_rej; /* Percentage of http requests rejected */
1764 int local_urls_read = urls_read;
1765 int local_urls_rejected = urls_rejected;
1768 * Need to alter the stats not to include the fetch of this
1771 * Can't do following thread safely! doh!
1774 * urls_rejected--; * This will be incremented subsequently *
1777 s = strsav(s,"<h2>Statistics for this " BANNER ":</h2>\n");
1779 if (local_urls_read == 0)
1782 s = strsav(s,"No activity so far!\n");
1788 perc_rej = (float)local_urls_rejected * 100.0F /
1789 (float)local_urls_read;
1792 "%d requests received, %d filtered "
1795 local_urls_rejected, perc_rej);
1797 s = strsav(s,out_str);
1802 #endif /* def STATISTICS */