1 const char filters_rcs[] = "$Id: filters.c,v 1.11 2001/05/29 11:53:23 oes Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/filters.c,v $
6 * Purpose : Declares functions to parse/crunch headers and pages.
7 * Functions declared include:
8 * `acl_addr', `add_stats', `block_acl', `block_imageurl',
9 * `block_url', `url_permissions', `domaincmp', `dsplit',
10 * `filter_popups', `forward_url', 'redirect_url',
11 * `ij_untrusted_url', `intercept_url', `re_process_buffer',
12 * `show_proxy_args', 'ijb_send_banner', and `trust_url'
14 * Copyright : Written by and Copyright (C) 2001 the SourceForge
15 * IJBSWA team. http://ijbswa.sourceforge.net
17 * Based on the Internet Junkbuster originally written
18 * by and Copyright (C) 1997 Anonymous Coders and
19 * Junkbusters Corporation. http://www.junkbusters.com
21 * This program is free software; you can redistribute it
22 * and/or modify it under the terms of the GNU General
23 * Public License as published by the Free Software
24 * Foundation; either version 2 of the License, or (at
25 * your option) any later version.
27 * This program is distributed in the hope that it will
28 * be useful, but WITHOUT ANY WARRANTY; without even the
29 * implied warranty of MERCHANTABILITY or FITNESS FOR A
30 * PARTICULAR PURPOSE. See the GNU General Public
31 * License for more details.
33 * The GNU General Public License should be included with
34 * this file. If not, you can view it at
35 * http://www.gnu.org/copyleft/gpl.html
36 * or write to the Free Software Foundation, Inc., 59
37 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41 * Revision 1.11 2001/05/29 11:53:23 oes
42 * "See why" link added to "blocked" page
44 * Revision 1.10 2001/05/29 09:50:24 jongfoster
45 * Unified blocklist/imagelist/permissionslist.
46 * File format is still under discussion, but the internal changes
49 * Also modified interceptor behaviour:
50 * - We now intercept all URLs beginning with one of the following
51 * prefixes (and *only* these prefixes):
53 * * http://ijbswa.sf.net/config/
54 * * http://ijbswa.sourceforge.net/config/
55 * - New interceptors "home page" - go to http://i.j.b/ to see it.
56 * - Internal changes so that intercepted and fast redirect pages
57 * are not replaced with an image.
58 * - Interceptors now have the option to send a binary page direct
59 * to the client. (i.e. ijb-send-banner uses this)
60 * - Implemented show-url-info interceptor. (Which is why I needed
61 * the above interceptors changes - a typical URL is
62 * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
63 * The previous mechanism would not have intercepted that, and
64 * if it had been intercepted then it then it would have replaced
67 * Revision 1.9 2001/05/27 22:17:04 oes
69 * - re_process_buffer no longer writes the modified buffer
70 * to the client, which was very ugly. It now returns the
71 * buffer, which it is then written by chat.
73 * - content_length now adjusts the Content-Length: header
74 * for modified documents rather than crunch()ing it.
75 * (Length info in csp->content_length, which is 0 for
76 * unmodified documents)
78 * - For this to work, sed() is called twice when filtering.
80 * Revision 1.8 2001/05/26 17:13:28 jongfoster
81 * Filled in a function comment.
83 * Revision 1.7 2001/05/26 15:26:15 jongfoster
84 * ACL feature now provides more security by immediately dropping
85 * connections from untrusted hosts.
87 * Revision 1.6 2001/05/26 00:28:36 jongfoster
88 * Automatic reloading of config file.
89 * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32).
90 * Most of the global variables have been moved to a new
91 * struct configuration_spec, accessed through csp->config->globalname
92 * Most of the globals remaining are used by the Win32 GUI.
94 * Revision 1.5 2001/05/25 22:34:30 jongfoster
97 * Revision 1.4 2001/05/22 18:46:04 oes
99 * - Enabled filtering banners by size rather than URL
100 * by adding patterns that replace all standard banner
101 * sizes with the "Junkbuster" gif to the re_filterfile
103 * - Enabled filtering WebBugs by providing a pattern
104 * which kills all 1x1 images
106 * - Added support for PCRE_UNGREEDY behaviour to pcrs,
107 * which is selected by the (nonstandard and therefore
108 * capital) letter 'U' in the option string.
109 * It causes the quantifiers to be ungreedy by default.
110 * Appending a ? turns back to greedy (!).
112 * - Added a new interceptor ijb-send-banner, which
113 * sends back the "Junkbuster" gif. Without imagelist or
114 * MSIE detection support, or if tinygif = 1, or the
115 * URL isn't recognized as an imageurl, a lame HTML
116 * explanation is sent instead.
118 * - Added new feature, which permits blocking remote
119 * script redirects and firing back a local redirect
121 * The feature is conditionally compiled, i.e. it
122 * can be disabled with --disable-fast-redirects,
123 * plus it must be activated by a "fast-redirects"
124 * line in the config file, has its own log level
125 * and of course wants to be displayed by show-proxy-args
126 * Note: Boy, all the #ifdefs in 1001 locations and
127 * all the fumbling with configure.in and acconfig.h
128 * were *way* more work than the feature itself :-(
130 * - Because a generic redirect template was needed for
131 * this, tinygif = 3 now uses the same.
133 * - Moved GIFs, and other static HTTP response templates
138 * - Removed some >400 CRs again (Jon, you really worked
141 * Revision 1.3 2001/05/20 16:44:47 jongfoster
142 * Removing last hardcoded JunkBusters.com URLs.
144 * Revision 1.2 2001/05/20 01:21:20 jongfoster
145 * Version 2.9.4 checkin.
146 * - Merged popupfile and cookiefile, and added control over PCRS
147 * filtering, in new "permissionsfile".
148 * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration
149 * file error you now get a message box (in the Win32 GUI) rather
150 * than the program exiting with no explanation.
151 * - Made killpopup use the PCRS MIME-type checking and HTTP-header
153 * - Removed tabs from "config"
154 * - Moved duplicated url parsing code in "loaders.c" to a new funcition.
155 * - Bumped up version number.
157 * Revision 1.1.1.1 2001/05/15 13:58:52 oes
158 * Initial import of version 2.9.3 source tree
161 *********************************************************************/
167 #include <sys/types.h>
174 #include <netinet/in.h>
176 #include <winsock2.h>
183 #include "showargs.h"
187 #include "jbsockets.h"
189 #include "jbsockets.h"
190 #include "miscutil.h"
196 const char filters_h_rcs[] = FILTERS_H_VERSION;
198 /* Fix a problem with Solaris. There should be no effect on other
200 * Solaris's isspace() is a macro which uses it's argument directly
201 * as an array index. Therefore we need to make sure that high-bit
202 * characters generate +ve values, and ideally we also want to make
203 * the argument match the declared parameter type of "int".
205 #define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
208 static const char CBLOCK[] =
210 "HTTP/1.0 403 Request for blocked URL\n"
211 #else /* ifndef AMIGA */
212 "HTTP/1.0 202 Request for blocked URL\n"
213 #endif /* ndef AMIGA */
215 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
216 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
217 "Content-Type: text/html\n\n"
220 "<title>Internet Junkbuster: Request for blocked URL</title>\n"
226 "<p align=center>Your request for <b>%s%s</b>\n"
227 "was blocked.<br><a href=\"http://i.j.b/show-url-info?url=%s%s\">See why</a>"
229 " or <a href=\"http://%s" FORCE_PREFIX "%s\">"
230 "go there anyway.</a>"
231 #endif /* def FORCE_LOAD */
237 static const char CTRUST[] =
239 "HTTP/1.0 403 Request for untrusted URL\n"
240 #else /* ifndef AMIGA */
241 "HTTP/1.0 202 Request for untrusted URL\n"
242 #endif /* ndef AMIGA */
244 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
245 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
246 "Content-Type: text/html\n\n"
249 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
253 "<a href=http://i.j.b/ij-untrusted-url?%s+%s+%s>"
259 #endif /* def TRUST_FILES */
263 /*********************************************************************
265 * Function : block_acl
267 * Description : Block this request?
268 * Decide yes or no based on ACL file.
271 * 1 : dst = The proxy or gateway address this is going to.
272 * Or NULL to check all possible targets.
273 * 2 : csp = Current client state (buffers, headers, etc...)
274 * Also includes the client IP address.
276 * Returns : 0 = FALSE (don't block) and 1 = TRUE (do block)
278 *********************************************************************/
279 int block_acl(struct access_control_addr *dst,
280 struct client_state *csp)
282 struct file_list *fl;
283 struct access_control_list *a, *acl;
285 /* if not using an access control list, then permit the connection */
286 if (((fl = csp->alist) == NULL) ||
287 ((acl = (struct access_control_list *) fl->f) == NULL))
292 /* search the list */
293 for (a = acl->next ; a ; a = a->next)
295 if ((csp->ip_addr_long & a->src->mask) == a->src->addr)
299 /* Just want to check if they have any access */
300 if (a->action == ACL_PERMIT)
305 else if ( ((dst->addr & a->dst->mask) == a->dst->addr)
306 && ((dst->port == a->dst->port) || (a->dst->port == 0)))
308 if (a->action == ACL_PERMIT)
325 /*********************************************************************
327 * Function : acl_addr
329 * Description : Called from `load_aclfile' to parse an ACL address.
332 * 1 : aspec = String specifying ACL address.
333 * 2 : aca = struct access_control_addr to fill in.
335 * Returns : 0 => Ok, everything else is an error.
337 *********************************************************************/
338 int acl_addr(char *aspec, struct access_control_addr *aca)
340 int i, masklength, port;
346 if ((p = strchr(aspec, '/')))
350 if (ijb_isdigit(*p) == 0)
354 masklength = atoi(p);
357 if ((masklength < 0) || (masklength > 32))
362 if ((p = strchr(aspec, ':')))
366 if (ijb_isdigit(*p) == 0)
375 aca->addr = ntohl(resolve_hostname_to_ip(aspec));
379 log_error(LOG_LEVEL_ERROR, "can't resolve address for %s", aspec);
383 /* build the netmask */
385 for (i=1; i <= masklength ; i++)
387 aca->mask |= (1 << (32 - i));
390 /* now mask off the host portion of the ip address
391 * (i.e. save on the network portion of the address).
393 aca->addr = aca->addr & aca->mask;
398 #endif /* def ACL_FILES */
401 /*********************************************************************
403 * Function : block_url
405 * Description : Called from `chat'. Check to see if we need to block this.
408 * 1 : http = http_request request to "check" for blocked
409 * 2 : csp = Current client state (buffers, headers, etc...)
411 * Returns : NULL => unblocked, else string to HTML block description.
413 *********************************************************************/
414 char *block_url(struct http_request *http, struct client_state *csp)
420 if ((csp->permissions & PERMIT_BLOCK) == 0)
428 #endif /* def FORCE_LOAD */
431 n += factor * strlen(http->hostport);
432 n += factor * strlen(http->path);
434 p = (char *)malloc(n);
437 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path,
438 http->hostport, http->path);
440 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path);
441 #endif /* def FORCE_LOAD */
448 #ifdef IMAGE_BLOCKING
449 /*********************************************************************
451 * Function : block_imageurl
453 * Description : Given a URL which is blocked, decide whether to
454 * send the "blocked" image or HTML.
457 * 1 : http = URL to check.
458 * 2 : csp = Current client state (buffers, headers, etc...)
460 * Returns : True (nonzero) if URL is in image list, false (0)
463 *********************************************************************/
464 int block_imageurl(struct http_request *http, struct client_state *csp)
466 #ifdef DETECT_MSIE_IMAGES
467 if ((csp->accept_types
468 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
469 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE))
473 else if ((csp->accept_types
474 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
475 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_HTML))
481 return ((csp->permissions & PERMIT_IMAGE) != 0);
483 #endif /* def IMAGE_BLOCKING */
487 /*********************************************************************
489 * Function : re_process_buffer
491 * Description : Apply all jobs from the joblist (aka. Perl regexp's) to
492 * the text buffer that's been accumulated in csp->iob->buf
493 * and set csp->content_length to the modified size.
496 * 1 : csp = Current client state (buffers, headers, etc...)
498 * Returns : a pointer to the (newly allocated) modified buffer.
501 *********************************************************************/
502 char *re_process_buffer(struct client_state *csp)
505 int size = csp->iob->eod - csp->iob->cur;
506 char *old=csp->iob->cur, *new = NULL;
507 pcrs_job *job, *joblist;
509 struct file_list *fl;
510 struct re_filterfile_spec *b;
512 /* Sanity first ;-) */
518 if ( ( NULL == (fl = csp->rlist) ) || ( NULL == (b = fl->f) ) )
520 log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering.");
524 joblist = b->joblist;
527 log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) ...",
528 csp->http->hostport, csp->http->path, size);
530 /* Apply all jobs from the joblist */
531 for (job = joblist; NULL != job; job = job->next)
533 hits += pcrs_exec_substitution(job, old, size, &new, &size);
534 if (old != csp->iob->cur) free(old);
538 log_error(LOG_LEVEL_RE_FILTER, " produced %d hits (new size %d).", hits, size);
540 csp->content_length = size;
542 /* fwiw, reset the iob */
547 #endif /* def PCRS */
551 /*********************************************************************
553 * Function : trust_url
555 * Description : Should we "trust" this URL? See "trustfile" line in config.
558 * 1 : http = http_request request for requested URL
559 * 2 : csp = Current client state (buffers, headers, etc...)
561 * Returns : NULL => trusted, else string to HTML "untrusted" description.
563 *********************************************************************/
564 char *trust_url(struct http_request *http, struct client_state *csp)
566 struct file_list *fl;
567 struct block_spec *b;
568 struct url_spec url[1], **tl, *t;
570 char *hostport, *path, *refer;
571 struct http_request rhttp[1];
574 if (((fl = csp->tlist) == NULL) || ((b = fl->f) == NULL))
579 *url = dsplit(http->host);
581 /* if splitting the domain fails, punt */
582 if (url->dbuf == NULL) return(NULL);
584 memset(rhttp, '\0', sizeof(*rhttp));
586 for (b = b->next; b ; b = b->next)
588 if ((b->url->port == 0) || (b->url->port == http->port))
590 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
592 if ((b->url->path == NULL) ||
594 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
596 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
603 if (b->reject == 0) return(NULL);
605 hostport = url_encode(http->hostport);
606 path = url_encode(http->path);
610 refer = url_encode(csp->referrer);
614 refer = url_encode("undefined");
618 n += strlen(hostport);
622 p = (char *)malloc(n);
624 sprintf(p, CTRUST, hostport, path, refer);
639 if ((csp->referrer == NULL)|| (strlen(csp->referrer) <= 9))
641 /* no referrer was supplied */
642 goto trust_url_not_trusted;
645 /* forge a URL from the referrer so we can use
646 * convert_url() to parse it into its components.
650 p = strsav(p, "GET ");
651 p = strsav(p, csp->referrer + 9); /* skip over "Referer: " */
652 p = strsav(p, " HTTP/1.0");
654 parse_http_request(p, rhttp, csp);
656 if (rhttp->cmd == NULL)
659 goto trust_url_not_trusted;
664 *url = dsplit(rhttp->host);
666 /* if splitting the domain fails, punt */
667 if (url->dbuf == NULL) goto trust_url_not_trusted;
669 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
671 if ((t->port == 0) || (t->port == rhttp->port))
673 if ((t->domain[0] == '\0') || domaincmp(t, url) == 0)
675 if ((t->path == NULL) ||
677 (regexec(t->preg, rhttp->path, 0, NULL, 0) == 0)
679 (strncmp(t->path, rhttp->path, t->pathlen) == 0)
683 /* if the URL's referrer is from a trusted referrer, then
684 * add the target spec to the trustfile as an unblocked
685 * domain and return NULL (which means it's OK).
693 if ((fp = fopen(csp->config->trustfile, "a")))
698 h = strsav(h, http->hostport);
704 /* since this path points into a user's home space
705 * be sure to include this spec in the trustfile.
707 if ((p = strchr(p, '/')))
710 h = strsav(h, http->path);
715 free_http_request(rhttp);
717 fprintf(fp, "%s\n", h);
727 trust_url_not_trusted:
728 free_http_request(rhttp);
730 hostport = url_encode(http->hostport);
731 path = url_encode(http->path);
735 refer = url_encode(csp->referrer);
739 refer = url_encode("undefined");
743 n += strlen(hostport);
747 p = (char *)malloc(n);
748 sprintf(p, CTRUST, hostport, path, refer);
757 #endif /* def TRUST_FILES */
760 static const char C_HOME_PAGE[] =
763 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
764 "Content-Type: text/html\n\n"
767 "<title>Internet Junkbuster: Information</title>\n"
773 "<p><a href=\"" HOME_PAGE_URL "\">JunkBuster web site</a></p>\n"
774 "<p><a href=\"http://i.j.b/show-proxy-arg\">Proxy configuration</a></p>\n"
775 "<p><a href=\"http://i.j.b/show-url-info\">Look up a URL</a></p>\n"
780 /*********************************************************************
782 * Function : intercept_url
784 * Description : checks the URL `basename' against a list of URLs to
785 * snarf. If it matches, it calls the associated function
786 * which returns an HTML page to send back to the client.
787 * Right now, we snarf:
788 * "show-proxy-args", and
789 * "ij-untrusted-url" (optional w/TRUST_FILES)
792 * 1 : http = http_request request, check `basename's of blocklist
793 * 2 : csp = Current client state (buffers, headers, etc...)
795 * Returns : 1 if it intercepts & handles the request.
797 *********************************************************************/
798 int intercept_url(struct http_request *http, struct client_state *csp)
800 char *basename = NULL;
801 const struct interceptors *v;
803 if (0 == strcmpic(http->host,"i.j.b"))
806 * Catch http://i.j.b/...
808 basename = http->path;
810 else if ( ( (0 == strcmpic(http->host,"ijbswa.sourceforge.net"))
811 || (0 == strcmpic(http->host,"ijbswa.sf.net")) )
812 && (0 == strncmpic(http->path,"/config", 7))
813 && ((http->path[7] == '/') || (http->path[7] == '\0')))
816 * Catch http://ijbswa.sourceforge.net/config/...
817 * and http://ijbswa.sf.net/config/...
819 basename = http->path + 7;
824 /* Don't want to intercept */
828 /* We have intercepted it. */
830 /* remove any leading slash */
831 if (*basename == '/')
836 log_error(LOG_LEVEL_GPC, "%s%s intercepted!", http->hostport, http->path);
837 log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 200 3",
838 csp->ip_addr_str, http->cmd);
840 for (v = intercept_patterns; v->str; v++)
842 if (strncmp(basename, v->str, v->len) == 0)
844 char * p = ((v->interceptor)(http, csp));
848 /* Send HTML redirection result */
849 write_socket(csp->cfd, p, strlen(p));
857 write_socket(csp->cfd, C_HOME_PAGE, strlen(C_HOME_PAGE));
862 #ifdef FAST_REDIRECTS
863 /*********************************************************************
865 * Function : redirect_url
867 * Description : Checks for redirection URLs and returns a HTTP redirect
868 * to the destination URL.
871 * 1 : http = http_request request, check `basename's of blocklist
872 * 2 : csp = Current client state (buffers, headers, etc...)
874 * Returns : NULL if URL was clean, HTTP redirect otherwise.
876 *********************************************************************/
877 char *redirect_url(struct http_request *http, struct client_state *csp)
881 p = q = csp->http->path;
882 log_error(LOG_LEVEL_REDIRECTS, "checking path: %s", p);
884 /* find the last URL encoded in the request */
885 while (p = strstr(p, "http://"))
890 /* if there was any, generate and return a HTTP redirect */
891 if (q != csp->http->path)
893 log_error(LOG_LEVEL_REDIRECTS, "redirecting to: %s", q);
895 p = (char *)malloc(strlen(HTTP_REDIRECT_TEMPLATE) + strlen(q));
896 sprintf(p, HTTP_REDIRECT_TEMPLATE, q);
905 #endif /* def FAST_REDIRECTS */
907 /*********************************************************************
909 * Function : url_permissions
911 * Description : Gets the permissions for this URL.
914 * 1 : http = http_request request for blocked URLs
915 * 2 : csp = Current client state (buffers, headers, etc...)
917 * Returns : permissions bitmask specifiying what this URL can do.
918 * If not on list, will be default_permissions.
920 *********************************************************************/
921 int url_permissions(struct http_request *http, struct client_state *csp)
923 struct file_list *fl;
924 struct permissions_spec *b;
925 struct url_spec url[1];
926 int permissions = csp->config->default_permissions;
928 if (((fl = csp->permissions_list) == NULL) || ((b = fl->f) == NULL))
933 *url = dsplit(http->host);
935 /* if splitting the domain fails, punt */
936 if (url->dbuf == NULL)
941 for (b = b->next; NULL != b; b = b->next)
943 if ((b->url->port == 0) || (b->url->port == http->port))
945 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
947 if ((b->url->path == NULL) ||
949 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
951 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
955 permissions &= b->mask;
956 permissions |= b->add;
969 /*********************************************************************
971 * Function : forward_url
973 * Description : Should we forward this to another proxy?
976 * 1 : http = http_request request for current URL
977 * 2 : csp = Current client state (buffers, headers, etc...)
979 * Returns : Return gw_default for no forward match,
980 * else a gateway pointer to a specific forwarding proxy.
982 *********************************************************************/
983 const struct gateway *forward_url(struct http_request *http, struct client_state *csp)
985 struct file_list *fl;
986 struct forward_spec *b;
987 struct url_spec url[1];
989 if (((fl = csp->flist) == NULL) || ((b = fl->f) == NULL))
994 *url = dsplit(http->host);
996 /* if splitting the domain fails, punt */
997 if (url->dbuf == NULL) return(gw_default);
999 for (b = b->next; b ; b = b->next)
1001 if ((b->url->port == 0) || (b->url->port == http->port))
1003 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1005 if ((b->url->path == NULL) ||
1007 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
1009 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
1028 /*********************************************************************
1032 * Description : Takes a domain and returns a pointer to a url_spec
1033 * structure populated with dbuf, dcnt and dvec. The
1034 * other fields in the structure that is returned are zero.
1037 * 1 : domain = a URL address
1039 * Returns : url_spec structure populated with dbuf, dcnt and dvec.
1041 *********************************************************************/
1042 struct url_spec dsplit(char *domain)
1044 struct url_spec ret[1];
1049 memset(ret, '\0', sizeof(*ret));
1051 ret->unanchored = (domain[strlen(domain) - 1] == '.');
1053 ret->dbuf = strdup(domain);
1055 /* map to lower case */
1056 for (p = ret->dbuf; *p ; p++) *p = tolower(*p);
1058 /* split the domain name into components */
1059 ret->dcnt = ssplit(ret->dbuf, ".", v, SZ(v), 1, 1);
1063 memset(ret, '\0', sizeof(ret));
1067 /* save a copy of the pointers in dvec */
1068 size = ret->dcnt * sizeof(*ret->dvec);
1070 if ((ret->dvec = (char **)malloc(size)))
1072 memcpy(ret->dvec, v, size);
1081 /*********************************************************************
1083 * Function : domaincmp
1085 * Description : Compare domain names.
1086 * domaincmp("a.b.c", "a.b.c") => 0 (MATCH)
1087 * domaincmp("a*.b.c", "a.b.c") => 0 (MATCH)
1088 * domaincmp("a*.b.c", "abc.b.c") => 0 (MATCH)
1089 * domaincmp("a*c.b.c","abbc.b.c") => 0 (MATCH)
1090 * domaincmp("*a.b.c", "dabc.b.c") => 0 (MATCH)
1091 * domaincmp("b.c" , "a.b.c") => 0 (MATCH)
1092 * domaincmp("a.b" , "a.b.c") => 1 (DIFF)
1093 * domaincmp("a.b." , "a.b.c") => 0 (MATCH)
1094 * domaincmp("" , "a.b.c") => 0 (MATCH)
1096 * FIXME: I need a definition!
1099 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1100 * 2 : fqdn = domain name against which the patterns are compared.
1102 * Returns : 0 => domains are equivalent, else no match.
1104 *********************************************************************/
1105 int domaincmp(struct url_spec *pattern, struct url_spec *fqdn)
1107 char **pv, **fv; /* vectors */
1108 int pn, fn; /* counters */
1109 char *p, *f; /* chars */
1115 while (fn < fqdn->dcnt && pn < pattern->dcnt)
1120 if (trivimatch(p, f))
1134 return ((pn < pattern->dcnt) || ((fn < fqdn->dcnt) && !pattern->unanchored));
1139 /* intercept functions */
1141 /*********************************************************************
1143 * Function : show_proxy_args
1145 * Description : This "crunch"es "http:/any.thing/show-proxy-args" and
1146 * returns a web page describing the current status of IJB.
1149 * 1 : http = ignored
1150 * 2 : csp = Current client state (buffers, headers, etc...)
1152 * Returns : A string that contains the current status of IJB.
1154 *********************************************************************/
1155 char *show_proxy_args(struct http_request *http, struct client_state *csp)
1159 #ifdef SPLIT_PROXY_ARGS
1163 const char * filename = NULL;
1164 const char * file_description = NULL;
1165 char * query_string = strrchr(http->path, '?');
1166 char which_file = '\0';
1169 if (query_string != NULL)
1171 /* first char past the last '?' (maybe '\0')*/
1172 which_file = query_string[1];
1177 if (csp->permissions_list)
1179 filename = csp->permissions_list->filename;
1180 file_description = "Permissions List";
1186 filename = csp->flist->filename;
1187 file_description = "Forward List";
1195 filename = csp->alist->filename;
1196 file_description = "Access Control List";
1199 #endif /* def ACL_FILES */
1205 filename = csp->rlist->filename;
1206 file_description = "RE Filter List";
1209 #endif /* def PCRS */
1215 filename = csp->tlist->filename;
1216 file_description = "Trust List";
1219 #endif /* def TRUST_FILES */
1224 /* Display specified file */
1225 /* FIXME: Add HTTP headers so this isn't cached */
1228 "Server: IJ/" VERSION "\n"
1229 "Content-type: text/html\n"
1230 "Pragma: no-cache\n"
1231 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1232 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1237 "<title>Internet Junkbuster Proxy Status - ");
1238 s = strsav(s, file_description);
1242 "<body bgcolor=\"#f8f8f0\" link=\"#000078\" alink=\"#ff0022\" vlink=\"#787878\">\n"
1244 "<h1>" BANNER "\n");
1245 s = strsav(s, file_description);
1248 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1250 s = strsav(s, file_description);
1253 "Contents of file "<code>");
1254 p = html_encode(filename);
1258 "</code>":<br>\n"
1262 if ((fp = fopen(filename, "r")) == NULL)
1264 s = strsav(s, "</pre><h1>ERROR OPENING FILE!</h1><pre>");
1268 while (fgets(buf, sizeof(buf), fp))
1270 p = html_encode(buf);
1275 s = strsav(s, "<br>");
1284 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1286 "<small><small><p>\n"
1287 "The " BANNER " Proxy - \n"
1288 "<a href=\"" HOME_PAGE_URL "\">" HOME_PAGE_URL "</a>\n"
1290 "</body></html>\n");
1293 #endif /* def SPLIT_PROXY_ARGS */
1295 s = strsav(s, csp->config->proxy_args_header);
1296 s = strsav(s, csp->config->proxy_args_invocation);
1299 #endif /* def STATISTICS */
1300 s = strsav(s, csp->config->proxy_args_gateways);
1302 #ifdef SPLIT_PROXY_ARGS
1304 "<h2>The following files are in use:</h2>\n"
1305 "<p>(Click a filename to view it)</p>\n"
1308 if (csp->permissions_list)
1310 s = strsav(s, "<li>Permissions List: <a href=\"show-proxy-args?permit\"><code>");
1311 s = strsav(s, csp->permissions_list->filename);
1312 s = strsav(s, "</code></a></li>\n");
1317 s = strsav(s, "<li>Forward List: <a href=\"show-proxy-args?forward\"><code>");
1318 s = strsav(s, csp->flist->filename);
1319 s = strsav(s, "</code></a></li>\n");
1325 s = strsav(s, "<li>Access Control List: <a href=\"show-proxy-args?acl\"><code>");
1326 s = strsav(s, csp->alist->filename);
1327 s = strsav(s, "</code></a></li>\n");
1329 #endif /* def ACL_FILES */
1334 s = strsav(s, "<li>RE Filter List: <a href=\"show-proxy-args?re\"><code>");
1335 s = strsav(s, csp->rlist->filename);
1336 s = strsav(s, "</code></a></li>\n");
1338 #endif /* def PCRS */
1343 s = strsav(s, "<li>Trust List: <a href=\"show-proxy-args?trust\"><code>");
1344 s = strsav(s, csp->tlist->filename);
1345 s = strsav(s, "</code></a></li>\n");
1347 #endif /* def TRUST_FILES */
1349 s = strsav(s, "</ul>");
1351 #else /* ifndef SPLIT_PROXY_ARGS */
1354 s = strsav(s, csp->clist->proxy_args);
1359 s = strsav(s, csp->flist->proxy_args);
1365 s = strsav(s, csp->alist->proxy_args);
1367 #endif /* def ACL_FILES */
1372 s = strsav(s, csp->rlist->proxy_args);
1374 #endif /* def PCRS */
1379 s = strsav(s, csp->tlist->proxy_args);
1381 #endif /* def TRUST_FILES */
1383 #endif /* ndef SPLIT_PROXY_ARGS */
1385 s = strsav(s, csp->config->proxy_args_trailer);
1392 static const char C_URL_INFO_HEADER[] =
1394 "Pragma: no-cache\n"
1395 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1396 "Content-Type: text/html\n\n"
1399 "<title>Internet Junkbuster: URL Info</title>\n"
1405 "<p>Information for: <a href=\"http://%s\">http://%s</a></p>\n";
1406 static const char C_URL_INFO_FOOTER[] =
1411 static const char C_URL_INFO_FORM[] =
1413 "Pragma: no-cache\n"
1414 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1415 "Content-Type: text/html\n\n"
1418 "<title>Internet Junkbuster: URL Info</title>\n"
1424 "<form method=\"GET\" action=\"http://i.j.b/show-url-info\">\n"
1425 "<p>Please enter a URL, without the leading "http://":</p>"
1426 "<p><input type=\"text\" name=\"url\" size=\"80\">"
1427 "<input type=\"submit\" value=\"Info\"></p>\n"
1433 /*********************************************************************
1435 * Function : permissions_to_text
1437 * Description : Converts a permissionsfil entry from numeric form
1438 * ("mask" and "add") to text.
1441 * 1 : mask = As from struct permissions_spec
1442 * 2 : add = As from struct permissions_spec
1444 * Returns : A string. Caller must free it.
1446 *********************************************************************/
1447 char * permissions_to_text(unsigned mask, unsigned add)
1449 char * result = strdup("");
1451 /* sanity - prevents "-feature +feature" */
1454 #define PERMISSION_TO_TEXT(__bit, __name) \
1455 if (!(mask & __bit)) \
1457 result = strsav(result, " -" __name); \
1459 else if (add & __bit) \
1461 result = strsav(result, " +" __name); \
1464 PERMISSION_TO_TEXT(PERMIT_COOKIE_SET, "cookies-set");
1465 PERMISSION_TO_TEXT(PERMIT_COOKIE_READ, "cookies-read");
1466 PERMISSION_TO_TEXT(PERMIT_RE_FILTER, "filter");
1467 PERMISSION_TO_TEXT(PERMIT_POPUPS, "popup");
1468 PERMISSION_TO_TEXT(PERMIT_REFERER, "referer");
1469 PERMISSION_TO_TEXT(PERMIT_FAST_REDIRECTS, "fast-redirects");
1470 PERMISSION_TO_TEXT(PERMIT_BLOCK, "block");
1471 PERMISSION_TO_TEXT(PERMIT_IMAGE, "image");
1477 /*********************************************************************
1479 * Function : ijb_show_url_info
1481 * Description : (please fill me in)
1484 * 1 : http = http_request request for crunched URL
1485 * 2 : csp = Current client state (buffers, headers, etc...)
1487 * Returns : ???FIXME
1489 *********************************************************************/
1490 char *ijb_show_url_info(struct http_request *http, struct client_state *csp)
1492 char * query_string = strchr(http->path, '?');
1495 if (query_string != NULL)
1497 query_string = url_decode(query_string + 1);
1498 if (strncmpic(query_string, "url=", 4) == 0)
1500 host = strdup(query_string + 4);
1502 freez(query_string);
1510 struct file_list *fl;
1511 struct permissions_spec *b;
1512 struct url_spec url[1];
1513 int permissions = csp->config->default_permissions;
1515 result = (char *)malloc(sizeof(C_URL_INFO_HEADER) + 2 * strlen(host));
1516 sprintf(result, C_URL_INFO_HEADER, host, host);
1518 s = permissions_to_text(permissions, permissions);
1519 result = strsav(result, "<h3>Defaults:</h3>\n<p><b>{");
1520 result = strsav(result, s);
1521 result = strsav(result, " }</b></p>\n<h3>Patterns affecting the URL:</h3>\n<p>\n");
1524 s = strchr(host, '/');
1534 s = strchr(host, ':');
1541 if (((fl = csp->permissions_list) == NULL) || ((b = fl->f) == NULL))
1545 result = strsav(result, C_URL_INFO_FOOTER);
1549 *url = dsplit(host);
1551 /* if splitting the domain fails, punt */
1552 if (url->dbuf == NULL)
1556 result = strsav(result, C_URL_INFO_FOOTER);
1560 for (b = b->next; NULL != b; b = b->next)
1562 if ((b->url->port == 0) || (b->url->port == port))
1564 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1566 if ((b->url->path == NULL) ||
1568 (regexec(b->url->preg, path, 0, NULL, 0) == 0)
1570 (strncmp(b->url->path, path, b->url->pathlen) == 0)
1574 s = permissions_to_text(b->mask, b->add);
1575 result = strsav(result, "<b>{");
1576 result = strsav(result, s);
1577 result = strsav(result, " }</b><br>\n<code>");
1578 result = strsav(result, b->url->spec);
1579 result = strsav(result, "</code><br>\n<br>\n");
1581 permissions &= b->mask;
1582 permissions |= b->add;
1594 s = permissions_to_text(permissions, permissions);
1595 result = strsav(result, "</p>\n<h2>Final Results:</h2>\n<p><b>{");
1596 result = strsav(result, s);
1597 result = strsav(result, " }</b><br>\n<br>\n");
1600 result = strsav(result, C_URL_INFO_FOOTER);
1605 return strdup(C_URL_INFO_FORM);
1610 /*********************************************************************
1612 * Function : ijb_send_banner
1614 * Description : This "crunch"es "http://i.j.b/ijb-send-banner and
1618 * 1 : http = http_request request for crunched URL
1619 * 2 : csp = Current client state (buffers, headers, etc...)
1621 * Returns : NULL, indicating that it has already sent the data.
1623 *********************************************************************/
1624 char *ijb_send_banner(struct http_request *http, struct client_state *csp)
1626 write_socket(csp->cfd, JBGIF, sizeof(JBGIF)-1);
1632 /*********************************************************************
1634 * Function : ij_untrusted_url
1636 * Description : This "crunch"es "http:/any.thing/ij-untrusted-url" and
1637 * returns a web page describing why it was untrusted.
1640 * 1 : http = http_request request for crunched URL
1641 * 2 : csp = Current client state (buffers, headers, etc...)
1643 * Returns : A string that contains why this was untrusted.
1645 *********************************************************************/
1646 char *ij_untrusted_url(struct http_request *http, struct client_state *csp)
1649 char *hostport, *path, *refer, *p, *v[9];
1651 struct url_spec **tl, *t;
1654 static const char format[] =
1655 "HTTP/1.0 200 OK\r\n"
1656 "Pragma: no-cache\n"
1657 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1658 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1659 "Content-Type: text/html\n\n"
1662 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
1668 "The " BANNER " Proxy "
1669 "<A href=\"" HOME_PAGE_URL "\">"
1670 "(" HOME_PAGE_URL ") </A>"
1671 "intercepted the request for %s%s\n"
1672 "because the URL is not trusted.\n"
1675 if ((n = ssplit(http->path, "?+", v, SZ(v), 0, 0)) == 4)
1677 hostport = url_decode(v[1]);
1678 path = url_decode(v[2]);
1679 refer = url_decode(v[3]);
1683 hostport = strdup("undefined_host");
1684 path = strdup("/undefined_path");
1685 refer = strdup("undefined");
1689 n += strlen(hostport);
1692 if ((p = (char *)malloc(n)))
1694 sprintf(p, format, hostport, path);
1697 strsav(p, "The referrer in this request was <strong>");
1699 strsav(p, "</strong><br>\n");
1705 p = strsav(p, "<h3>The following referrers are trusted</h3>\n");
1707 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
1709 sprintf(buf, "%s<br>\n", t->spec);
1713 if (csp->config->trust_info->next)
1719 "You can learn more about what this means "
1720 "and what you may be able to do about it by "
1721 "reading the following documents:<br>\n"
1727 for (l = csp->config->trust_info->next; l ; l = l->next)
1730 "<li> <a href=%s>%s</a><br>\n",
1735 p = strsav(p, "</ol>\n");
1738 p = strsav(p, "</body>\n" "</html>\n");
1743 #endif /* def TRUST_FILES */
1747 /*********************************************************************
1749 * Function : add_stats
1751 * Description : Statistics function of JB. Called by `show_proxy_args'.
1754 * 1 : s = string that holds the proxy args description page
1756 * Returns : A pointer to the descriptive status web page.
1758 *********************************************************************/
1759 char *add_stats(char *s)
1762 * Output details of the number of requests rejected and
1763 * accepted. This is switchable in the junkbuster config.
1764 * Does nothing if this option is not enabled.
1767 float perc_rej; /* Percentage of http requests rejected */
1769 int local_urls_read = urls_read;
1770 int local_urls_rejected = urls_rejected;
1773 * Need to alter the stats not to include the fetch of this
1776 * Can't do following thread safely! doh!
1779 * urls_rejected--; * This will be incremented subsequently *
1782 s = strsav(s,"<h2>Statistics for this " BANNER ":</h2>\n");
1784 if (local_urls_read == 0)
1787 s = strsav(s,"No activity so far!\n");
1793 perc_rej = (float)local_urls_rejected * 100.0F /
1794 (float)local_urls_read;
1797 "%d requests received, %d filtered "
1800 local_urls_rejected, perc_rej);
1802 s = strsav(s,out_str);
1807 #endif /* def STATISTICS */