1 const char filters_rcs[] = "$Id: filters.c,v 1.13 2001/05/31 21:21:30 jongfoster Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/filters.c,v $
6 * Purpose : Declares functions to parse/crunch headers and pages.
7 * Functions declared include:
8 * `acl_addr', `add_stats', `block_acl', `block_imageurl',
9 * `block_url', `url_actions', `domaincmp', `dsplit',
10 * `filter_popups', `forward_url', 'redirect_url',
11 * `ij_untrusted_url', `intercept_url', `re_process_buffer',
12 * `show_proxy_args', 'ijb_send_banner', and `trust_url'
14 * Copyright : Written by and Copyright (C) 2001 the SourceForge
15 * IJBSWA team. http://ijbswa.sourceforge.net
17 * Based on the Internet Junkbuster originally written
18 * by and Copyright (C) 1997 Anonymous Coders and
19 * Junkbusters Corporation. http://www.junkbusters.com
21 * This program is free software; you can redistribute it
22 * and/or modify it under the terms of the GNU General
23 * Public License as published by the Free Software
24 * Foundation; either version 2 of the License, or (at
25 * your option) any later version.
27 * This program is distributed in the hope that it will
28 * be useful, but WITHOUT ANY WARRANTY; without even the
29 * implied warranty of MERCHANTABILITY or FITNESS FOR A
30 * PARTICULAR PURPOSE. See the GNU General Public
31 * License for more details.
33 * The GNU General Public License should be included with
34 * this file. If not, you can view it at
35 * http://www.gnu.org/copyleft/gpl.html
36 * or write to the Free Software Foundation, Inc., 59
37 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41 * Revision 1.13 2001/05/31 21:21:30 jongfoster
42 * Permissionsfile / actions file changes:
43 * - Changed "permission" to "action" throughout
44 * - changes to file format to allow string parameters
45 * - Moved helper functions to actions.c
47 * Revision 1.12 2001/05/31 17:35:20 oes
49 * - Enhanced domain part globbing with infix and prefix asterisk
50 * matching and optional unanchored operation
52 * Revision 1.11 2001/05/29 11:53:23 oes
53 * "See why" link added to "blocked" page
55 * Revision 1.10 2001/05/29 09:50:24 jongfoster
56 * Unified blocklist/imagelist/permissionslist.
57 * File format is still under discussion, but the internal changes
60 * Also modified interceptor behaviour:
61 * - We now intercept all URLs beginning with one of the following
62 * prefixes (and *only* these prefixes):
64 * * http://ijbswa.sf.net/config/
65 * * http://ijbswa.sourceforge.net/config/
66 * - New interceptors "home page" - go to http://i.j.b/ to see it.
67 * - Internal changes so that intercepted and fast redirect pages
68 * are not replaced with an image.
69 * - Interceptors now have the option to send a binary page direct
70 * to the client. (i.e. ijb-send-banner uses this)
71 * - Implemented show-url-info interceptor. (Which is why I needed
72 * the above interceptors changes - a typical URL is
73 * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
74 * The previous mechanism would not have intercepted that, and
75 * if it had been intercepted then it then it would have replaced
78 * Revision 1.9 2001/05/27 22:17:04 oes
80 * - re_process_buffer no longer writes the modified buffer
81 * to the client, which was very ugly. It now returns the
82 * buffer, which it is then written by chat.
84 * - content_length now adjusts the Content-Length: header
85 * for modified documents rather than crunch()ing it.
86 * (Length info in csp->content_length, which is 0 for
87 * unmodified documents)
89 * - For this to work, sed() is called twice when filtering.
91 * Revision 1.8 2001/05/26 17:13:28 jongfoster
92 * Filled in a function comment.
94 * Revision 1.7 2001/05/26 15:26:15 jongfoster
95 * ACL feature now provides more security by immediately dropping
96 * connections from untrusted hosts.
98 * Revision 1.6 2001/05/26 00:28:36 jongfoster
99 * Automatic reloading of config file.
100 * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32).
101 * Most of the global variables have been moved to a new
102 * struct configuration_spec, accessed through csp->config->globalname
103 * Most of the globals remaining are used by the Win32 GUI.
105 * Revision 1.5 2001/05/25 22:34:30 jongfoster
108 * Revision 1.4 2001/05/22 18:46:04 oes
110 * - Enabled filtering banners by size rather than URL
111 * by adding patterns that replace all standard banner
112 * sizes with the "Junkbuster" gif to the re_filterfile
114 * - Enabled filtering WebBugs by providing a pattern
115 * which kills all 1x1 images
117 * - Added support for PCRE_UNGREEDY behaviour to pcrs,
118 * which is selected by the (nonstandard and therefore
119 * capital) letter 'U' in the option string.
120 * It causes the quantifiers to be ungreedy by default.
121 * Appending a ? turns back to greedy (!).
123 * - Added a new interceptor ijb-send-banner, which
124 * sends back the "Junkbuster" gif. Without imagelist or
125 * MSIE detection support, or if tinygif = 1, or the
126 * URL isn't recognized as an imageurl, a lame HTML
127 * explanation is sent instead.
129 * - Added new feature, which permits blocking remote
130 * script redirects and firing back a local redirect
132 * The feature is conditionally compiled, i.e. it
133 * can be disabled with --disable-fast-redirects,
134 * plus it must be activated by a "fast-redirects"
135 * line in the config file, has its own log level
136 * and of course wants to be displayed by show-proxy-args
137 * Note: Boy, all the #ifdefs in 1001 locations and
138 * all the fumbling with configure.in and acconfig.h
139 * were *way* more work than the feature itself :-(
141 * - Because a generic redirect template was needed for
142 * this, tinygif = 3 now uses the same.
144 * - Moved GIFs, and other static HTTP response templates
149 * - Removed some >400 CRs again (Jon, you really worked
152 * Revision 1.3 2001/05/20 16:44:47 jongfoster
153 * Removing last hardcoded JunkBusters.com URLs.
155 * Revision 1.2 2001/05/20 01:21:20 jongfoster
156 * Version 2.9.4 checkin.
157 * - Merged popupfile and cookiefile, and added control over PCRS
158 * filtering, in new "permissionsfile".
159 * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration
160 * file error you now get a message box (in the Win32 GUI) rather
161 * than the program exiting with no explanation.
162 * - Made killpopup use the PCRS MIME-type checking and HTTP-header
164 * - Removed tabs from "config"
165 * - Moved duplicated url parsing code in "loaders.c" to a new funcition.
166 * - Bumped up version number.
168 * Revision 1.1.1.1 2001/05/15 13:58:52 oes
169 * Initial import of version 2.9.3 source tree
172 *********************************************************************/
178 #include <sys/types.h>
185 #include <netinet/in.h>
187 #include <winsock2.h>
194 #include "showargs.h"
198 #include "jbsockets.h"
200 #include "jbsockets.h"
201 #include "miscutil.h"
208 const char filters_h_rcs[] = FILTERS_H_VERSION;
210 /* Fix a problem with Solaris. There should be no effect on other
212 * Solaris's isspace() is a macro which uses it's argument directly
213 * as an array index. Therefore we need to make sure that high-bit
214 * characters generate +ve values, and ideally we also want to make
215 * the argument match the declared parameter type of "int".
217 #define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
220 static const char CBLOCK[] =
222 "HTTP/1.0 403 Request for blocked URL\n"
223 #else /* ifndef AMIGA */
224 "HTTP/1.0 202 Request for blocked URL\n"
225 #endif /* ndef AMIGA */
227 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
228 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
229 "Content-Type: text/html\n\n"
232 "<title>Internet Junkbuster: Request for blocked URL</title>\n"
238 "<p align=center>Your request for <b>%s%s</b>\n"
239 "was blocked.<br><a href=\"http://i.j.b/show-url-info?url=%s%s\">See why</a>"
241 " or <a href=\"http://%s" FORCE_PREFIX "%s\">"
242 "go there anyway.</a>"
243 #endif /* def FORCE_LOAD */
249 static const char CTRUST[] =
251 "HTTP/1.0 403 Request for untrusted URL\n"
252 #else /* ifndef AMIGA */
253 "HTTP/1.0 202 Request for untrusted URL\n"
254 #endif /* ndef AMIGA */
256 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
257 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
258 "Content-Type: text/html\n\n"
261 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
265 "<a href=http://i.j.b/ij-untrusted-url?%s+%s+%s>"
271 #endif /* def TRUST_FILES */
275 /*********************************************************************
277 * Function : block_acl
279 * Description : Block this request?
280 * Decide yes or no based on ACL file.
283 * 1 : dst = The proxy or gateway address this is going to.
284 * Or NULL to check all possible targets.
285 * 2 : csp = Current client state (buffers, headers, etc...)
286 * Also includes the client IP address.
288 * Returns : 0 = FALSE (don't block) and 1 = TRUE (do block)
290 *********************************************************************/
291 int block_acl(struct access_control_addr *dst,
292 struct client_state *csp)
294 struct file_list *fl;
295 struct access_control_list *a, *acl;
297 /* if not using an access control list, then permit the connection */
298 if (((fl = csp->alist) == NULL) ||
299 ((acl = (struct access_control_list *) fl->f) == NULL))
304 /* search the list */
305 for (a = acl->next ; a ; a = a->next)
307 if ((csp->ip_addr_long & a->src->mask) == a->src->addr)
311 /* Just want to check if they have any access */
312 if (a->action == ACL_PERMIT)
317 else if ( ((dst->addr & a->dst->mask) == a->dst->addr)
318 && ((dst->port == a->dst->port) || (a->dst->port == 0)))
320 if (a->action == ACL_PERMIT)
337 /*********************************************************************
339 * Function : acl_addr
341 * Description : Called from `load_aclfile' to parse an ACL address.
344 * 1 : aspec = String specifying ACL address.
345 * 2 : aca = struct access_control_addr to fill in.
347 * Returns : 0 => Ok, everything else is an error.
349 *********************************************************************/
350 int acl_addr(char *aspec, struct access_control_addr *aca)
352 int i, masklength, port;
358 if ((p = strchr(aspec, '/')))
362 if (ijb_isdigit(*p) == 0)
366 masklength = atoi(p);
369 if ((masklength < 0) || (masklength > 32))
374 if ((p = strchr(aspec, ':')))
378 if (ijb_isdigit(*p) == 0)
387 aca->addr = ntohl(resolve_hostname_to_ip(aspec));
391 log_error(LOG_LEVEL_ERROR, "can't resolve address for %s", aspec);
395 /* build the netmask */
397 for (i=1; i <= masklength ; i++)
399 aca->mask |= (1 << (32 - i));
402 /* now mask off the host portion of the ip address
403 * (i.e. save on the network portion of the address).
405 aca->addr = aca->addr & aca->mask;
410 #endif /* def ACL_FILES */
413 /*********************************************************************
415 * Function : block_url
417 * Description : Called from `chat'. Check to see if we need to block this.
420 * 1 : http = http_request request to "check" for blocked
421 * 2 : csp = Current client state (buffers, headers, etc...)
423 * Returns : NULL => unblocked, else string to HTML block description.
425 *********************************************************************/
426 char *block_url(struct http_request *http, struct client_state *csp)
432 if ((csp->action->flags & ACTION_BLOCK) == 0)
440 #endif /* def FORCE_LOAD */
443 n += factor * strlen(http->hostport);
444 n += factor * strlen(http->path);
446 p = (char *)malloc(n);
449 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path,
450 http->hostport, http->path);
452 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path);
453 #endif /* def FORCE_LOAD */
460 #ifdef IMAGE_BLOCKING
461 /*********************************************************************
463 * Function : block_imageurl
465 * Description : Given a URL which is blocked, decide whether to
466 * send the "blocked" image or HTML.
469 * 1 : http = URL to check.
470 * 2 : csp = Current client state (buffers, headers, etc...)
472 * Returns : True (nonzero) if URL is in image list, false (0)
475 *********************************************************************/
476 int block_imageurl(struct http_request *http, struct client_state *csp)
478 #ifdef DETECT_MSIE_IMAGES
479 if ((csp->accept_types
480 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
481 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE))
485 else if ((csp->accept_types
486 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
487 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_HTML))
493 return ((csp->action->flags & ACTION_IMAGE) != 0);
495 #endif /* def IMAGE_BLOCKING */
499 /*********************************************************************
501 * Function : re_process_buffer
503 * Description : Apply all jobs from the joblist (aka. Perl regexp's) to
504 * the text buffer that's been accumulated in csp->iob->buf
505 * and set csp->content_length to the modified size.
508 * 1 : csp = Current client state (buffers, headers, etc...)
510 * Returns : a pointer to the (newly allocated) modified buffer.
513 *********************************************************************/
514 char *re_process_buffer(struct client_state *csp)
517 int size = csp->iob->eod - csp->iob->cur;
518 char *old=csp->iob->cur, *new = NULL;
519 pcrs_job *job, *joblist;
521 struct file_list *fl;
522 struct re_filterfile_spec *b;
524 /* Sanity first ;-) */
530 if ( ( NULL == (fl = csp->rlist) ) || ( NULL == (b = fl->f) ) )
532 log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering.");
536 joblist = b->joblist;
539 log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) ...",
540 csp->http->hostport, csp->http->path, size);
542 /* Apply all jobs from the joblist */
543 for (job = joblist; NULL != job; job = job->next)
545 hits += pcrs_exec_substitution(job, old, size, &new, &size);
546 if (old != csp->iob->cur) free(old);
550 log_error(LOG_LEVEL_RE_FILTER, " produced %d hits (new size %d).", hits, size);
552 csp->content_length = size;
554 /* fwiw, reset the iob */
559 #endif /* def PCRS */
563 /*********************************************************************
565 * Function : trust_url
567 * Description : Should we "trust" this URL? See "trustfile" line in config.
570 * 1 : http = http_request request for requested URL
571 * 2 : csp = Current client state (buffers, headers, etc...)
573 * Returns : NULL => trusted, else string to HTML "untrusted" description.
575 *********************************************************************/
576 char *trust_url(struct http_request *http, struct client_state *csp)
578 struct file_list *fl;
579 struct block_spec *b;
580 struct url_spec url[1], **tl, *t;
582 char *hostport, *path, *refer;
583 struct http_request rhttp[1];
586 if (((fl = csp->tlist) == NULL) || ((b = fl->f) == NULL))
591 *url = dsplit(http->host);
593 /* if splitting the domain fails, punt */
594 if (url->dbuf == NULL) return(NULL);
596 memset(rhttp, '\0', sizeof(*rhttp));
598 for (b = b->next; b ; b = b->next)
600 if ((b->url->port == 0) || (b->url->port == http->port))
602 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
604 if ((b->url->path == NULL) ||
606 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
608 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
615 if (b->reject == 0) return(NULL);
617 hostport = url_encode(http->hostport);
618 path = url_encode(http->path);
622 refer = url_encode(csp->referrer);
626 refer = url_encode("undefined");
630 n += strlen(hostport);
634 p = (char *)malloc(n);
636 sprintf(p, CTRUST, hostport, path, refer);
651 if ((csp->referrer == NULL)|| (strlen(csp->referrer) <= 9))
653 /* no referrer was supplied */
654 goto trust_url_not_trusted;
657 /* forge a URL from the referrer so we can use
658 * convert_url() to parse it into its components.
662 p = strsav(p, "GET ");
663 p = strsav(p, csp->referrer + 9); /* skip over "Referer: " */
664 p = strsav(p, " HTTP/1.0");
666 parse_http_request(p, rhttp, csp);
668 if (rhttp->cmd == NULL)
671 goto trust_url_not_trusted;
676 *url = dsplit(rhttp->host);
678 /* if splitting the domain fails, punt */
679 if (url->dbuf == NULL) goto trust_url_not_trusted;
681 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
683 if ((t->port == 0) || (t->port == rhttp->port))
685 if ((t->domain[0] == '\0') || domaincmp(t, url) == 0)
687 if ((t->path == NULL) ||
689 (regexec(t->preg, rhttp->path, 0, NULL, 0) == 0)
691 (strncmp(t->path, rhttp->path, t->pathlen) == 0)
695 /* if the URL's referrer is from a trusted referrer, then
696 * add the target spec to the trustfile as an unblocked
697 * domain and return NULL (which means it's OK).
705 if ((fp = fopen(csp->config->trustfile, "a")))
710 h = strsav(h, http->hostport);
716 /* since this path points into a user's home space
717 * be sure to include this spec in the trustfile.
719 if ((p = strchr(p, '/')))
722 h = strsav(h, http->path);
727 free_http_request(rhttp);
729 fprintf(fp, "%s\n", h);
739 trust_url_not_trusted:
740 free_http_request(rhttp);
742 hostport = url_encode(http->hostport);
743 path = url_encode(http->path);
747 refer = url_encode(csp->referrer);
751 refer = url_encode("undefined");
755 n += strlen(hostport);
759 p = (char *)malloc(n);
760 sprintf(p, CTRUST, hostport, path, refer);
769 #endif /* def TRUST_FILES */
772 static const char C_HOME_PAGE[] =
775 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
776 "Content-Type: text/html\n\n"
779 "<title>Internet Junkbuster: Information</title>\n"
785 "<p><a href=\"" HOME_PAGE_URL "\">JunkBuster web site</a></p>\n"
786 "<p><a href=\"http://i.j.b/show-proxy-arg\">Proxy configuration</a></p>\n"
787 "<p><a href=\"http://i.j.b/show-url-info\">Look up a URL</a></p>\n"
792 /*********************************************************************
794 * Function : intercept_url
796 * Description : checks the URL `basename' against a list of URLs to
797 * snarf. If it matches, it calls the associated function
798 * which returns an HTML page to send back to the client.
799 * Right now, we snarf:
800 * "show-proxy-args", and
801 * "ij-untrusted-url" (optional w/TRUST_FILES)
804 * 1 : http = http_request request, check `basename's of blocklist
805 * 2 : csp = Current client state (buffers, headers, etc...)
807 * Returns : 1 if it intercepts & handles the request.
809 *********************************************************************/
810 int intercept_url(struct http_request *http, struct client_state *csp)
812 char *basename = NULL;
813 const struct interceptors *v;
815 if (0 == strcmpic(http->host,"i.j.b"))
818 * Catch http://i.j.b/...
820 basename = http->path;
822 else if ( ( (0 == strcmpic(http->host,"ijbswa.sourceforge.net"))
823 || (0 == strcmpic(http->host,"ijbswa.sf.net")) )
824 && (0 == strncmpic(http->path,"/config", 7))
825 && ((http->path[7] == '/') || (http->path[7] == '\0')))
828 * Catch http://ijbswa.sourceforge.net/config/...
829 * and http://ijbswa.sf.net/config/...
831 basename = http->path + 7;
836 /* Don't want to intercept */
840 /* We have intercepted it. */
842 /* remove any leading slash */
843 if (*basename == '/')
848 log_error(LOG_LEVEL_GPC, "%s%s intercepted!", http->hostport, http->path);
849 log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 200 3",
850 csp->ip_addr_str, http->cmd);
852 for (v = intercept_patterns; v->str; v++)
854 if (strncmp(basename, v->str, v->len) == 0)
856 char * p = ((v->interceptor)(http, csp));
860 /* Send HTML redirection result */
861 write_socket(csp->cfd, p, strlen(p));
869 write_socket(csp->cfd, C_HOME_PAGE, strlen(C_HOME_PAGE));
874 #ifdef FAST_REDIRECTS
875 /*********************************************************************
877 * Function : redirect_url
879 * Description : Checks for redirection URLs and returns a HTTP redirect
880 * to the destination URL.
883 * 1 : http = http_request request, check `basename's of blocklist
884 * 2 : csp = Current client state (buffers, headers, etc...)
886 * Returns : NULL if URL was clean, HTTP redirect otherwise.
888 *********************************************************************/
889 char *redirect_url(struct http_request *http, struct client_state *csp)
893 p = q = csp->http->path;
894 log_error(LOG_LEVEL_REDIRECTS, "checking path: %s", p);
896 /* find the last URL encoded in the request */
897 while (p = strstr(p, "http://"))
902 /* if there was any, generate and return a HTTP redirect */
903 if (q != csp->http->path)
905 log_error(LOG_LEVEL_REDIRECTS, "redirecting to: %s", q);
907 p = (char *)malloc(strlen(HTTP_REDIRECT_TEMPLATE) + strlen(q));
908 sprintf(p, HTTP_REDIRECT_TEMPLATE, q);
917 #endif /* def FAST_REDIRECTS */
919 /*********************************************************************
921 * Function : url_actions
923 * Description : Gets the actions for this URL.
926 * 1 : http = http_request request for blocked URLs
927 * 2 : csp = Current client state (buffers, headers, etc...)
931 *********************************************************************/
932 void url_actions(struct http_request *http,
933 struct client_state *csp)
935 struct file_list *fl;
936 struct url_actions *b;
938 init_current_action(csp->action);
940 if (((fl = csp->actions_list) == NULL) || ((b = fl->f) == NULL))
945 apply_url_actions(csp->action, http, b);
949 /*********************************************************************
951 * Function : apply_url_actions
953 * Description : Applies a list of URL actions.
956 * 1 : action = Destination.
957 * 2 : http = Current URL
958 * 3 : b = list of URL actions to apply
962 *********************************************************************/
963 void apply_url_actions(struct current_action_spec *action,
964 struct http_request *http,
965 struct url_actions *b)
967 struct url_spec url[1];
971 /* Should never happen */
975 *url = dsplit(http->host);
977 /* if splitting the domain fails, punt */
978 if (url->dbuf == NULL)
983 for (b = b->next; NULL != b; b = b->next)
985 if ((b->url->port == 0) || (b->url->port == http->port))
987 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
989 if ((b->url->path == NULL) ||
991 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
993 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
997 merge_current_action(action, b->action);
1008 /*********************************************************************
1010 * Function : forward_url
1012 * Description : Should we forward this to another proxy?
1015 * 1 : http = http_request request for current URL
1016 * 2 : csp = Current client state (buffers, headers, etc...)
1018 * Returns : Return gw_default for no forward match,
1019 * else a gateway pointer to a specific forwarding proxy.
1021 *********************************************************************/
1022 const struct gateway *forward_url(struct http_request *http, struct client_state *csp)
1024 struct file_list *fl;
1025 struct forward_spec *b;
1026 struct url_spec url[1];
1028 if (((fl = csp->flist) == NULL) || ((b = fl->f) == NULL))
1033 *url = dsplit(http->host);
1035 /* if splitting the domain fails, punt */
1036 if (url->dbuf == NULL) return(gw_default);
1038 for (b = b->next; b ; b = b->next)
1040 if ((b->url->port == 0) || (b->url->port == http->port))
1042 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1044 if ((b->url->path == NULL) ||
1046 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
1048 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
1067 /*********************************************************************
1071 * Description : Takes a domain and returns a pointer to a url_spec
1072 * structure populated with dbuf, dcnt and dvec. The
1073 * other fields in the structure that is returned are zero.
1076 * 1 : domain = a URL address
1078 * Returns : url_spec structure populated with dbuf, dcnt and dvec.
1080 *********************************************************************/
1081 struct url_spec dsplit(char *domain)
1083 struct url_spec ret[1];
1088 memset(ret, '\0', sizeof(*ret));
1090 if (domain[strlen(domain) - 1] == '.')
1092 ret->unanchored |= ANCHOR_RIGHT;
1094 if (domain[0] == '.')
1096 ret->unanchored |= ANCHOR_LEFT;
1099 ret->dbuf = strdup(domain);
1101 /* map to lower case */
1102 for (p = ret->dbuf; *p ; p++) *p = tolower(*p);
1104 /* split the domain name into components */
1105 ret->dcnt = ssplit(ret->dbuf, ".", v, SZ(v), 1, 1);
1109 memset(ret, '\0', sizeof(ret));
1113 /* save a copy of the pointers in dvec */
1114 size = ret->dcnt * sizeof(*ret->dvec);
1116 if ((ret->dvec = (char **)malloc(size)))
1118 memcpy(ret->dvec, v, size);
1127 /*********************************************************************
1129 * Function : domaincmp
1131 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1132 * pattern->unachored, the comparison is un-, left-,
1133 * right-anchored, or both.
1134 * The individual domain names are compared with
1138 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1139 * 2 : fqdn = domain name against which the patterns are compared.
1141 * Returns : 0 => domains are equivalent, else no match.
1143 *********************************************************************/
1144 int domaincmp(struct url_spec *pattern, struct url_spec *fqdn)
1146 char **pv, **fv; /* vectors */
1147 int pn, fn; /* counters */
1148 char *p, *f; /* chars */
1154 while (fn < fqdn->dcnt && pn < pattern->dcnt)
1159 if (simplematch(p, f))
1161 if(pn || !(pattern->unanchored & ANCHOR_LEFT))
1173 return ((pn < pattern->dcnt) || ((fn < fqdn->dcnt) && !(pattern->unanchored & ANCHOR_RIGHT)));
1178 /* intercept functions */
1180 /*********************************************************************
1182 * Function : show_proxy_args
1184 * Description : This "crunch"es "http:/any.thing/show-proxy-args" and
1185 * returns a web page describing the current status of IJB.
1188 * 1 : http = ignored
1189 * 2 : csp = Current client state (buffers, headers, etc...)
1191 * Returns : A string that contains the current status of IJB.
1193 *********************************************************************/
1194 char *show_proxy_args(struct http_request *http, struct client_state *csp)
1198 #ifdef SPLIT_PROXY_ARGS
1202 const char * filename = NULL;
1203 const char * file_description = NULL;
1204 char * query_string = strrchr(http->path, '?');
1205 char which_file = '\0';
1208 if (query_string != NULL)
1210 /* first char past the last '?' (maybe '\0')*/
1211 which_file = query_string[1];
1216 if (csp->actions_list)
1218 filename = csp->actions_list->filename;
1219 file_description = "Actions List";
1225 filename = csp->flist->filename;
1226 file_description = "Forward List";
1234 filename = csp->alist->filename;
1235 file_description = "Access Control List";
1238 #endif /* def ACL_FILES */
1244 filename = csp->rlist->filename;
1245 file_description = "RE Filter List";
1248 #endif /* def PCRS */
1254 filename = csp->tlist->filename;
1255 file_description = "Trust List";
1258 #endif /* def TRUST_FILES */
1263 /* Display specified file */
1264 /* FIXME: Add HTTP headers so this isn't cached */
1267 "Server: IJ/" VERSION "\n"
1268 "Content-type: text/html\n"
1269 "Pragma: no-cache\n"
1270 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1271 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1276 "<title>Internet Junkbuster Proxy Status - ");
1277 s = strsav(s, file_description);
1281 "<body bgcolor=\"#f8f8f0\" link=\"#000078\" alink=\"#ff0022\" vlink=\"#787878\">\n"
1283 "<h1>" BANNER "\n");
1284 s = strsav(s, file_description);
1287 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1289 s = strsav(s, file_description);
1292 "Contents of file "<code>");
1293 p = html_encode(filename);
1297 "</code>":<br>\n"
1301 if ((fp = fopen(filename, "r")) == NULL)
1303 s = strsav(s, "</pre><h1>ERROR OPENING FILE!</h1><pre>");
1307 while (fgets(buf, sizeof(buf), fp))
1309 p = html_encode(buf);
1314 s = strsav(s, "<br>");
1323 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1325 "<small><small><p>\n"
1326 "The " BANNER " Proxy - \n"
1327 "<a href=\"" HOME_PAGE_URL "\">" HOME_PAGE_URL "</a>\n"
1329 "</body></html>\n");
1332 #endif /* def SPLIT_PROXY_ARGS */
1334 s = strsav(s, csp->config->proxy_args_header);
1335 s = strsav(s, csp->config->proxy_args_invocation);
1338 #endif /* def STATISTICS */
1339 s = strsav(s, csp->config->proxy_args_gateways);
1341 #ifdef SPLIT_PROXY_ARGS
1343 "<h2>The following files are in use:</h2>\n"
1344 "<p>(Click a filename to view it)</p>\n"
1347 if (csp->actions_list)
1349 s = strsav(s, "<li>Actions List: <a href=\"show-proxy-args?permit\"><code>");
1350 s = strsav(s, csp->actions_list->filename);
1351 s = strsav(s, "</code></a></li>\n");
1356 s = strsav(s, "<li>Forward List: <a href=\"show-proxy-args?forward\"><code>");
1357 s = strsav(s, csp->flist->filename);
1358 s = strsav(s, "</code></a></li>\n");
1364 s = strsav(s, "<li>Access Control List: <a href=\"show-proxy-args?acl\"><code>");
1365 s = strsav(s, csp->alist->filename);
1366 s = strsav(s, "</code></a></li>\n");
1368 #endif /* def ACL_FILES */
1373 s = strsav(s, "<li>RE Filter List: <a href=\"show-proxy-args?re\"><code>");
1374 s = strsav(s, csp->rlist->filename);
1375 s = strsav(s, "</code></a></li>\n");
1377 #endif /* def PCRS */
1382 s = strsav(s, "<li>Trust List: <a href=\"show-proxy-args?trust\"><code>");
1383 s = strsav(s, csp->tlist->filename);
1384 s = strsav(s, "</code></a></li>\n");
1386 #endif /* def TRUST_FILES */
1388 s = strsav(s, "</ul>");
1390 #else /* ifndef SPLIT_PROXY_ARGS */
1393 s = strsav(s, csp->clist->proxy_args);
1398 s = strsav(s, csp->flist->proxy_args);
1404 s = strsav(s, csp->alist->proxy_args);
1406 #endif /* def ACL_FILES */
1411 s = strsav(s, csp->rlist->proxy_args);
1413 #endif /* def PCRS */
1418 s = strsav(s, csp->tlist->proxy_args);
1420 #endif /* def TRUST_FILES */
1422 #endif /* ndef SPLIT_PROXY_ARGS */
1424 s = strsav(s, csp->config->proxy_args_trailer);
1431 static const char C_URL_INFO_HEADER[] =
1433 "Pragma: no-cache\n"
1434 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1435 "Content-Type: text/html\n\n"
1438 "<title>Internet Junkbuster: URL Info</title>\n"
1444 "<p>Information for: <a href=\"http://%s\">http://%s</a></p>\n";
1445 static const char C_URL_INFO_FOOTER[] =
1450 static const char C_URL_INFO_FORM[] =
1452 "Pragma: no-cache\n"
1453 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1454 "Content-Type: text/html\n\n"
1457 "<title>Internet Junkbuster: URL Info</title>\n"
1463 "<form method=\"GET\" action=\"http://i.j.b/show-url-info\">\n"
1464 "<p>Please enter a URL, without the leading "http://":</p>"
1465 "<p><input type=\"text\" name=\"url\" size=\"80\">"
1466 "<input type=\"submit\" value=\"Info\"></p>\n"
1472 /*********************************************************************
1474 * Function : ijb_show_url_info
1476 * Description : (please fill me in)
1479 * 1 : http = http_request request for crunched URL
1480 * 2 : csp = Current client state (buffers, headers, etc...)
1482 * Returns : ???FIXME
1484 *********************************************************************/
1485 char *ijb_show_url_info(struct http_request *http, struct client_state *csp)
1487 char * query_string = strchr(http->path, '?');
1490 if (query_string != NULL)
1492 query_string = url_decode(query_string + 1);
1493 if (strncmpic(query_string, "url=", 4) == 0)
1495 host = strdup(query_string + 4);
1497 freez(query_string);
1505 struct file_list *fl;
1506 struct url_actions *b;
1507 struct url_spec url[1];
1508 struct current_action_spec action[1];
1510 init_current_action(action);
1512 result = (char *)malloc(sizeof(C_URL_INFO_HEADER) + 2 * strlen(host));
1513 sprintf(result, C_URL_INFO_HEADER, host, host);
1515 s = current_action_to_text(action);
1516 result = strsav(result, "<h3>Defaults:</h3>\n<p><b>{");
1517 result = strsav(result, s);
1518 result = strsav(result, " }</b></p>\n<h3>Patterns affecting the URL:</h3>\n<p>\n");
1521 s = strchr(host, '/');
1531 s = strchr(host, ':');
1538 if (((fl = csp->actions_list) == NULL) || ((b = fl->f) == NULL))
1542 result = strsav(result, C_URL_INFO_FOOTER);
1546 *url = dsplit(host);
1548 /* if splitting the domain fails, punt */
1549 if (url->dbuf == NULL)
1553 result = strsav(result, C_URL_INFO_FOOTER);
1557 for (b = b->next; NULL != b; b = b->next)
1559 if ((b->url->port == 0) || (b->url->port == port))
1561 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1563 if ((b->url->path == NULL) ||
1565 (regexec(b->url->preg, path, 0, NULL, 0) == 0)
1567 (strncmp(b->url->path, path, b->url->pathlen) == 0)
1571 s = actions_to_text(b->action);
1572 result = strsav(result, "<b>{");
1573 result = strsav(result, s);
1574 result = strsav(result, " }</b><br>\n<code>");
1575 result = strsav(result, b->url->spec);
1576 result = strsav(result, "</code><br>\n<br>\n");
1579 merge_current_action(action, b->action);
1591 s = current_action_to_text(action);
1592 result = strsav(result, "</p>\n<h2>Final Results:</h2>\n<p><b>{");
1593 result = strsav(result, s);
1594 result = strsav(result, " }</b><br>\n<br>\n");
1597 free_current_action(action);
1599 result = strsav(result, C_URL_INFO_FOOTER);
1604 return strdup(C_URL_INFO_FORM);
1609 /*********************************************************************
1611 * Function : ijb_send_banner
1613 * Description : This "crunch"es "http://i.j.b/ijb-send-banner and
1617 * 1 : http = http_request request for crunched URL
1618 * 2 : csp = Current client state (buffers, headers, etc...)
1620 * Returns : NULL, indicating that it has already sent the data.
1622 *********************************************************************/
1623 char *ijb_send_banner(struct http_request *http, struct client_state *csp)
1625 write_socket(csp->cfd, JBGIF, sizeof(JBGIF)-1);
1631 /*********************************************************************
1633 * Function : ij_untrusted_url
1635 * Description : This "crunch"es "http:/any.thing/ij-untrusted-url" and
1636 * returns a web page describing why it was untrusted.
1639 * 1 : http = http_request request for crunched URL
1640 * 2 : csp = Current client state (buffers, headers, etc...)
1642 * Returns : A string that contains why this was untrusted.
1644 *********************************************************************/
1645 char *ij_untrusted_url(struct http_request *http, struct client_state *csp)
1648 char *hostport, *path, *refer, *p, *v[9];
1650 struct url_spec **tl, *t;
1653 static const char format[] =
1654 "HTTP/1.0 200 OK\r\n"
1655 "Pragma: no-cache\n"
1656 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1657 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1658 "Content-Type: text/html\n\n"
1661 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
1667 "The " BANNER " Proxy "
1668 "<A href=\"" HOME_PAGE_URL "\">"
1669 "(" HOME_PAGE_URL ") </A>"
1670 "intercepted the request for %s%s\n"
1671 "because the URL is not trusted.\n"
1674 if ((n = ssplit(http->path, "?+", v, SZ(v), 0, 0)) == 4)
1676 hostport = url_decode(v[1]);
1677 path = url_decode(v[2]);
1678 refer = url_decode(v[3]);
1682 hostport = strdup("undefined_host");
1683 path = strdup("/undefined_path");
1684 refer = strdup("undefined");
1688 n += strlen(hostport);
1691 if ((p = (char *)malloc(n)))
1693 sprintf(p, format, hostport, path);
1696 strsav(p, "The referrer in this request was <strong>");
1698 strsav(p, "</strong><br>\n");
1704 p = strsav(p, "<h3>The following referrers are trusted</h3>\n");
1706 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
1708 sprintf(buf, "%s<br>\n", t->spec);
1712 if (csp->config->trust_info->next)
1718 "You can learn more about what this means "
1719 "and what you may be able to do about it by "
1720 "reading the following documents:<br>\n"
1726 for (l = csp->config->trust_info->next; l ; l = l->next)
1729 "<li> <a href=%s>%s</a><br>\n",
1734 p = strsav(p, "</ol>\n");
1737 p = strsav(p, "</body>\n" "</html>\n");
1742 #endif /* def TRUST_FILES */
1746 /*********************************************************************
1748 * Function : add_stats
1750 * Description : Statistics function of JB. Called by `show_proxy_args'.
1753 * 1 : s = string that holds the proxy args description page
1755 * Returns : A pointer to the descriptive status web page.
1757 *********************************************************************/
1758 char *add_stats(char *s)
1761 * Output details of the number of requests rejected and
1762 * accepted. This is switchable in the junkbuster config.
1763 * Does nothing if this option is not enabled.
1766 float perc_rej; /* Percentage of http requests rejected */
1768 int local_urls_read = urls_read;
1769 int local_urls_rejected = urls_rejected;
1772 * Need to alter the stats not to include the fetch of this
1775 * Can't do following thread safely! doh!
1778 * urls_rejected--; * This will be incremented subsequently *
1781 s = strsav(s,"<h2>Statistics for this " BANNER ":</h2>\n");
1783 if (local_urls_read == 0)
1786 s = strsav(s,"No activity so far!\n");
1792 perc_rej = (float)local_urls_rejected * 100.0F /
1793 (float)local_urls_read;
1796 "%d requests received, %d filtered "
1799 local_urls_rejected, perc_rej);
1801 s = strsav(s,out_str);
1806 #endif /* def STATISTICS */