1 /*********************************************************************
3 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
5 * Purpose : Declares functions to match URLs against URL
8 * Copyright : Written by and Copyright (C) 2001-2014
9 * the Privoxy team. http://www.privoxy.org/
11 * Based on the Internet Junkbuster originally written
12 * by and Copyright (C) 1997 Anonymous Coders and
13 * Junkbusters Corporation. http://www.junkbusters.com
15 * This program is free software; you can redistribute it
16 * and/or modify it under the terms of the GNU General
17 * Public License as published by the Free Software
18 * Foundation; either version 2 of the License, or (at
19 * your option) any later version.
21 * This program is distributed in the hope that it will
22 * be useful, but WITHOUT ANY WARRANTY; without even the
23 * implied warranty of MERCHANTABILITY or FITNESS FOR A
24 * PARTICULAR PURPOSE. See the GNU General Public
25 * License for more details.
27 * The GNU General Public License should be included with
28 * this file. If not, you can view it at
29 * http://www.gnu.org/copyleft/gpl.html
30 * or write to the Free Software Foundation, Inc., 59
31 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
33 *********************************************************************/
40 #include <sys/types.h>
48 #if !defined(_WIN32) && !defined(__OS2__)
65 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern);
67 /*********************************************************************
69 * Function : free_http_request
71 * Description : Freez a http_request structure
74 * 1 : http = points to a http_request structure to free
78 *********************************************************************/
79 void free_http_request(struct http_request *http)
88 freez(http->hostport);
91 freez(http->host_ip_addr_str);
92 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
100 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
101 /*********************************************************************
103 * Function : init_domain_components
105 * Description : Splits the domain name so we can compare it
106 * against wildcards. It used to be part of
107 * parse_http_url, but was separated because the
108 * same code is required in chat in case of
109 * intercepted requests.
112 * 1 : http = pointer to the http structure to hold elements.
114 * Returns : JB_ERR_OK on success
115 * JB_ERR_PARSE on malformed command/URL
116 * or >100 domains deep.
118 *********************************************************************/
119 jb_err init_domain_components(struct http_request *http)
121 char *vec[BUFFER_SIZE];
125 http->dbuffer = strdup_or_die(http->host);
127 /* map to lower case */
128 for (p = http->dbuffer; *p ; p++)
130 *p = (char)privoxy_tolower(*p);
133 /* split the domain name into components */
134 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
136 if (http->dcount <= 0)
139 * Error: More than SZ(vec) components in domain
140 * or: no components in domain
142 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
146 /* save a copy of the pointers in dvec */
147 size = (size_t)http->dcount * sizeof(*http->dvec);
149 http->dvec = malloc_or_die(size);
151 memcpy(http->dvec, vec, size);
155 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
158 /*********************************************************************
160 * Function : url_requires_percent_encoding
162 * Description : Checks if an URL contains invalid characters
163 * according to RFC 3986 that should be percent-encoded.
164 * Does not verify whether or not the passed string
165 * actually is a valid URL.
168 * 1 : url = URL to check
170 * Returns : True in case of valid URLs, false otherwise
172 *********************************************************************/
173 int url_requires_percent_encoding(const char *url)
175 static const char allowed_characters[128] = {
176 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
177 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
178 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
179 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
180 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
181 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
182 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
183 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
184 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
185 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
186 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
187 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
188 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
193 const unsigned int i = (unsigned char)*url++;
194 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
205 /*********************************************************************
207 * Function : parse_http_url
209 * Description : Parse out the host and port from the URL. Find the
210 * hostname & path, port (if ':'), and/or password (if '@')
213 * 1 : url = URL (or is it URI?) to break down
214 * 2 : http = pointer to the http structure to hold elements.
215 * Must be initialized with valid values (like NULLs).
216 * 3 : require_protocol = Whether or not URLs without
217 * protocol are acceptable.
219 * Returns : JB_ERR_OK on success
220 * JB_ERR_PARSE on malformed command/URL
221 * or >100 domains deep.
223 *********************************************************************/
224 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
226 int host_available = 1; /* A proxy can dream. */
229 * Save our initial URL
231 http->url = strdup_or_die(url);
234 * Check for * URI. If found, we're done.
236 if (*http->url == '*')
238 http->path = strdup_or_die("*");
239 http->hostport = strdup_or_die("");
240 if (http->url[1] != '\0')
249 * Split URL into protocol,hostport,path.
256 buf = strdup_or_die(url);
258 /* Find the start of the URL in our scratch space */
260 if (strncmpic(url_noproto, "http://", 7) == 0)
264 else if (strncmpic(url_noproto, "https://", 8) == 0)
267 * Should only happen when called from cgi_show_url_info().
272 else if (*url_noproto == '/')
275 * Short request line without protocol and host.
276 * Most likely because the client's request
277 * was intercepted and redirected into Privoxy.
282 else if (require_protocol)
288 url_path = strchr(url_noproto, '/');
289 if (url_path != NULL)
294 * NOTE: The following line ignores the path for HTTPS URLS.
295 * This means that you get consistent behaviour if you type a
296 * https URL in and it's parsed by the function. (When the
297 * URL is actually retrieved, SSL hides the path part).
299 http->path = strdup_or_die(http->ssl ? "/" : url_path);
301 http->hostport = strdup_or_die(url_noproto);
306 * Repair broken HTTP requests that don't contain a path,
307 * or CONNECT requests
309 http->path = strdup_or_die("/");
310 http->hostport = strdup_or_die(url_noproto);
318 /* Without host, there is nothing left to do here */
323 * Split hostport into user/password (ignored), host, port.
330 buf = strdup_or_die(http->hostport);
332 /* check if url contains username and/or password */
333 host = strchr(buf, '@');
336 /* Contains username/password, skip it and the @ sign. */
341 /* No username or password. */
345 /* Move after hostname before port number */
348 /* Numeric IPv6 address delimited by brackets */
350 port = strchr(host, ']');
354 /* Missing closing bracket */
365 else if (*port != ':')
367 /* Garbage after closing bracket */
374 /* Plain non-escaped hostname */
375 port = strchr(host, ':');
378 /* check if url contains port */
384 /* Terminate hostname and point to start of port string */
386 parsed_port = strtol(port, &endptr, 10);
387 if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0'))
389 log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url);
393 http->port = (int)parsed_port;
397 /* No port specified. */
398 http->port = (http->ssl ? 443 : 80);
401 http->host = strdup_or_die(host);
406 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
409 /* Split domain name so we can compare it against wildcards */
410 return init_domain_components(http);
411 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
416 /*********************************************************************
418 * Function : unknown_method
420 * Description : Checks whether a method is unknown.
423 * 1 : method = points to a http method
425 * Returns : TRUE if it's unknown, FALSE otherwise.
427 *********************************************************************/
428 static int unknown_method(const char *method)
430 static const char * const known_http_methods[] = {
431 /* Basic HTTP request type */
432 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
433 /* webDAV extensions (RFC2518) */
434 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
436 * Microsoft webDAV extension for Exchange 2000. See:
437 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
438 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
440 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
442 * Another Microsoft webDAV extension for Exchange 2000. See:
443 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
444 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
445 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
447 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
449 * Yet another WebDAV extension, this time for
450 * Web Distributed Authoring and Versioning (RFC3253)
452 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
453 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
455 * The PATCH method is defined by RFC5789, the format of the
456 * actual patch in the body depends on the application, but from
457 * Privoxy's point of view it doesn't matter.
463 for (i = 0; i < SZ(known_http_methods); i++)
465 if (0 == strcmpic(method, known_http_methods[i]))
476 /*********************************************************************
478 * Function : normalize_http_version
480 * Description : Take a supported HTTP version string and remove
481 * leading zeroes etc., reject unsupported versions.
483 * This is an explicit RFC 2616 (3.1) MUST and
484 * RFC 7230 mandates that intermediaries send their
485 * own HTTP-version in forwarded messages.
488 * 1 : http_version = HTTP version string
490 * Returns : JB_ERR_OK on success
491 * JB_ERR_PARSE if the HTTP version is unsupported
493 *********************************************************************/
494 static jb_err normalize_http_version(char *http_version)
496 unsigned int major_version;
497 unsigned int minor_version;
499 if (2 != sscanf(http_version, "HTTP/%u.%u", &major_version, &minor_version))
501 log_error(LOG_LEVEL_ERROR, "Unsupported HTTP version: %s", http_version);
505 if (major_version != 1 || (minor_version != 0 && minor_version != 1))
507 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
508 "versions are 1.0 and 1.1. This rules out: %s", http_version);
512 assert(strlen(http_version) >= 8);
513 snprintf(http_version, 9, "HTTP/%u.%u", major_version, minor_version);
520 /*********************************************************************
522 * Function : parse_http_request
524 * Description : Parse out the host and port from the URL. Find the
525 * hostname & path, port (if ':'), and/or password (if '@')
528 * 1 : req = HTTP request line to break down
529 * 2 : http = pointer to the http structure to hold elements
531 * Returns : JB_ERR_OK on success
532 * JB_ERR_CGI_PARAMS on malformed command/URL
533 * or >100 domains deep.
535 *********************************************************************/
536 jb_err parse_http_request(const char *req, struct http_request *http)
543 memset(http, '\0', sizeof(*http));
545 buf = strdup_or_die(req);
547 n = ssplit(buf, " \r\n", v, SZ(v));
555 * Fail in case of unknown methods
556 * which we might not handle correctly.
558 * XXX: There should be a config option
559 * to forward requests with unknown methods
560 * anyway. Most of them don't need special
563 if (unknown_method(v[0]))
565 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
570 if (JB_ERR_OK != normalize_http_version(v[2]))
576 http->ssl = !strcmpic(v[0], "CONNECT");
578 err = parse_http_url(v[1], http, !http->ssl);
586 * Copy the details into the structure
588 http->cmd = strdup_or_die(req);
589 http->gpc = strdup_or_die(v[0]);
590 http->ver = strdup_or_die(v[2]);
591 http->ocmd = strdup_or_die(http->cmd);
600 /*********************************************************************
602 * Function : compile_pattern
604 * Description : Compiles a host, domain or TAG pattern.
607 * 1 : pattern = The pattern to compile.
608 * 2 : anchoring = How the regex should be modified
609 * before compilation. Can be either
610 * one of NO_ANCHORING, LEFT_ANCHORED,
611 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
612 * 3 : url = In case of failures, the spec member is
613 * logged and the structure freed.
614 * 4 : regex = Where the compiled regex should be stored.
616 * Returns : JB_ERR_OK - Success
617 * JB_ERR_PARSE - Cannot parse regex
619 *********************************************************************/
620 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
621 struct pattern_spec *url, regex_t **regex)
624 const char *fmt = NULL;
630 if (pattern[0] == '\0')
644 case RIGHT_ANCHORED_HOST:
651 log_error(LOG_LEVEL_FATAL,
652 "Invalid anchoring in compile_pattern %d", anchoring);
654 rebuf_size = strlen(pattern) + strlen(fmt);
655 rebuf = malloc_or_die(rebuf_size);
656 *regex = zalloc_or_die(sizeof(**regex));
658 snprintf(rebuf, rebuf_size, fmt, pattern);
660 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
664 size_t errlen = regerror(errcode, *regex, rebuf, rebuf_size);
665 if (errlen > (rebuf_size - (size_t)1))
667 errlen = rebuf_size - (size_t)1;
669 rebuf[errlen] = '\0';
670 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
671 pattern, url->spec, rebuf);
672 free_pattern_spec(url);
684 /*********************************************************************
686 * Function : compile_url_pattern
688 * Description : Compiles the three parts of an URL pattern.
691 * 1 : url = Target pattern_spec to be filled in.
692 * 2 : buf = The url pattern to compile. Will be messed up.
694 * Returns : JB_ERR_OK - Success
695 * JB_ERR_MEMORY - Out of memory
696 * JB_ERR_PARSE - Cannot parse regex
698 *********************************************************************/
699 static jb_err compile_url_pattern(struct pattern_spec *url, char *buf)
703 p = strchr(buf, '/');
707 * Only compile the regex if it consists of more than
708 * a single slash, otherwise it wouldn't affect the result.
713 * XXX: does it make sense to compile the slash at the beginning?
715 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg);
717 if (JB_ERR_OK != err)
726 * IPv6 numeric hostnames can contain colons, thus we need
727 * to delimit the hostname before the real port separator.
728 * As brackets are already used in the hostname pattern,
729 * we use angle brackets ('<', '>') instead.
731 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
738 /* IPv6 address without port number */
743 /* Garbage after address delimiter */
749 p = strchr(buf, ':');
755 url->pattern.url_spec.port_list = strdup_or_die(p);
759 url->pattern.url_spec.port_list = NULL;
764 return compile_host_pattern(url, buf);
772 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
773 /*********************************************************************
775 * Function : compile_host_pattern
777 * Description : Parses and compiles a host pattern.
780 * 1 : url = Target pattern_spec to be filled in.
781 * 2 : host_pattern = Host pattern to compile.
783 * Returns : JB_ERR_OK - Success
784 * JB_ERR_MEMORY - Out of memory
785 * JB_ERR_PARSE - Cannot parse regex
787 *********************************************************************/
788 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
790 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
795 /*********************************************************************
797 * Function : compile_host_pattern
799 * Description : Parses and "compiles" an old-school host pattern.
802 * 1 : url = Target pattern_spec to be filled in.
803 * 2 : host_pattern = Host pattern to parse.
805 * Returns : JB_ERR_OK - Success
806 * JB_ERR_PARSE - Cannot parse regex
808 *********************************************************************/
809 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
818 if (host_pattern[strlen(host_pattern) - 1] == '.')
820 url->pattern.url_spec.unanchored |= ANCHOR_RIGHT;
822 if (host_pattern[0] == '.')
824 url->pattern.url_spec.unanchored |= ANCHOR_LEFT;
828 * Split domain into components
830 url->pattern.url_spec.dbuffer = strdup_or_die(host_pattern);
835 for (p = url->pattern.url_spec.dbuffer; *p ; p++)
837 *p = (char)privoxy_tolower(*p);
841 * Split the domain name into components
843 url->pattern.url_spec.dcount = ssplit(url->pattern.url_spec.dbuffer, ".", v, SZ(v));
845 if (url->pattern.url_spec.dcount < 0)
847 free_pattern_spec(url);
850 else if (url->pattern.url_spec.dcount != 0)
853 * Save a copy of the pointers in dvec
855 size = (size_t)url->pattern.url_spec.dcount * sizeof(*url->pattern.url_spec.dvec);
857 url->pattern.url_spec.dvec = malloc_or_die(size);
859 memcpy(url->pattern.url_spec.dvec, v, size);
862 * else dcount == 0 in which case we needn't do anything,
863 * since dvec will never be accessed and the pattern will
870 /*********************************************************************
872 * Function : simplematch
874 * Description : String matching, with a (greedy) '*' wildcard that
875 * stands for zero or more arbitrary characters and
876 * character classes in [], which take both enumerations
880 * 1 : pattern = pattern for matching
881 * 2 : text = text to be matched
883 * Returns : 0 if match, else nonzero
885 *********************************************************************/
886 static int simplematch(const char *pattern, const char *text)
888 const unsigned char *pat = (const unsigned char *)pattern;
889 const unsigned char *txt = (const unsigned char *)text;
890 const unsigned char *fallback = pat;
893 unsigned char lastchar = 'a';
895 unsigned char charmap[32];
900 /* EOF pattern but !EOF text? */
913 /* '*' in the pattern? */
917 /* The pattern ends afterwards? Speed up the return. */
923 /* Else, set wildcard mode and remember position after '*' */
928 /* Character range specification? */
931 memset(charmap, '\0', sizeof(charmap));
933 while (*++pat != ']')
939 else if (*pat == '-')
941 if ((*++pat == ']') || *pat == '\0')
945 for (i = lastchar; i <= *pat; i++)
947 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
952 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
956 } /* -END- if Character range specification */
960 * Char match, or char range match?
964 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
974 * No match && no wildcard: No luck
978 else if (pat != fallback)
981 * Increment text pointer if in char range matching
988 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
992 * Restart matching from current text pointer
999 /* Cut off extra '*'s */
1000 if (*pat == '*') pat++;
1002 /* If this is the pattern's end, fine! */
1008 /*********************************************************************
1010 * Function : simple_domaincmp
1012 * Description : Domain-wise Compare fqdn's. The comparison is
1013 * both left- and right-anchored. The individual
1014 * domain names are compared with simplematch().
1015 * This is only used by domain_match.
1018 * 1 : pv = array of patterns to compare
1019 * 2 : fv = array of domain components to compare
1020 * 3 : len = length of the arrays (both arrays are the
1021 * same length - if they weren't, it couldn't
1022 * possibly be a match).
1024 * Returns : 0 => domains are equivalent, else no match.
1026 *********************************************************************/
1027 static int simple_domaincmp(char **pv, char **fv, int len)
1031 for (n = 0; n < len; n++)
1033 if (simplematch(pv[n], fv[n]))
1044 /*********************************************************************
1046 * Function : domain_match
1048 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1049 * p.pattern->unachored, the comparison is un-, left-,
1050 * right-anchored, or both.
1051 * The individual domain names are compared with
1055 * 1 : p = a domain that may contain a '*' as a wildcard.
1056 * 2 : fqdn = domain name against which the patterns are compared.
1058 * Returns : 0 => domains are equivalent, else no match.
1060 *********************************************************************/
1061 static int domain_match(const struct pattern_spec *p, const struct http_request *fqdn)
1063 char **pv, **fv; /* vectors */
1065 int unanchored = p->pattern.url_spec.unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1067 plen = p->pattern.url_spec.dcount;
1068 flen = fqdn->dcount;
1072 /* fqdn is too short to match this pattern */
1076 pv = p->pattern.url_spec.dvec;
1079 if (unanchored == ANCHOR_LEFT)
1084 * Convert this into a fully anchored pattern with
1085 * the fqdn and pattern the same length
1087 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1088 return simple_domaincmp(pv, fv, plen);
1090 else if (unanchored == 0)
1092 /* Fully anchored, check length */
1097 return simple_domaincmp(pv, fv, plen);
1099 else if (unanchored == ANCHOR_RIGHT)
1101 /* Left anchored, ignore all extra in fqdn */
1102 return simple_domaincmp(pv, fv, plen);
1108 int maxn = flen - plen;
1109 for (n = 0; n <= maxn; n++)
1111 if (!simple_domaincmp(pv, fv, plen))
1116 * Doesn't match from start of fqdn
1117 * Try skipping first part of fqdn
1125 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
1128 /*********************************************************************
1130 * Function : create_pattern_spec
1132 * Description : Creates a "pattern_spec" structure from a string.
1133 * When finished, free with free_pattern_spec().
1136 * 1 : pattern = Target pattern_spec to be filled in.
1137 * Will be zeroed before use.
1138 * 2 : buf = Source pattern, null terminated. NOTE: The
1139 * contents of this buffer are destroyed by this
1140 * function. If this function succeeds, the
1141 * buffer is copied to pattern->spec. If this
1142 * function fails, the contents of the buffer
1145 * Returns : JB_ERR_OK - Success
1146 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1147 * written to system log)
1149 *********************************************************************/
1150 jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf)
1154 /** The tag pattern prefix to match */
1157 /** The length of the prefix to match */
1158 const size_t prefix_length;
1160 /** The pattern flag */
1161 const unsigned flag;
1163 { "TAG:", 4, PATTERN_SPEC_TAG_PATTERN},
1164 #ifdef FEATURE_CLIENT_TAGS
1165 { "CLIENT-TAG:", 11, PATTERN_SPEC_CLIENT_TAG_PATTERN},
1167 { "NO-REQUEST-TAG:", 15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN},
1168 { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN}
1175 memset(pattern, '\0', sizeof(*pattern));
1177 /* Remember the original specification for the CGI pages. */
1178 pattern->spec = strdup_or_die(buf);
1180 /* Check if it's a tag pattern */
1181 for (i = 0; i < SZ(tag_pattern); i++)
1183 if (0 == strncmpic(pattern->spec, tag_pattern[i].prefix, tag_pattern[i].prefix_length))
1185 /* The regex starts after the prefix */
1186 const char *tag_regex = buf + tag_pattern[i].prefix_length;
1188 pattern->flags |= tag_pattern[i].flag;
1190 return compile_pattern(tag_regex, NO_ANCHORING, pattern,
1191 &pattern->pattern.tag_regex);
1195 /* If it isn't a tag pattern it must be an URL pattern. */
1196 pattern->flags |= PATTERN_SPEC_URL_PATTERN;
1198 return compile_url_pattern(pattern, buf);
1203 /*********************************************************************
1205 * Function : free_pattern_spec
1207 * Description : Called from the "unloaders". Freez the pattern
1208 * structure elements.
1211 * 1 : pattern = pointer to a pattern_spec structure.
1215 *********************************************************************/
1216 void free_pattern_spec(struct pattern_spec *pattern)
1218 if (pattern == NULL) return;
1220 freez(pattern->spec);
1221 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1222 if (pattern->pattern.url_spec.host_regex)
1224 regfree(pattern->pattern.url_spec.host_regex);
1225 freez(pattern->pattern.url_spec.host_regex);
1228 freez(pattern->pattern.url_spec.dbuffer);
1229 freez(pattern->pattern.url_spec.dvec);
1230 pattern->pattern.url_spec.dcount = 0;
1231 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
1232 freez(pattern->pattern.url_spec.port_list);
1233 if (pattern->pattern.url_spec.preg)
1235 regfree(pattern->pattern.url_spec.preg);
1236 freez(pattern->pattern.url_spec.preg);
1238 if (pattern->pattern.tag_regex)
1240 regfree(pattern->pattern.tag_regex);
1241 freez(pattern->pattern.tag_regex);
1246 /*********************************************************************
1248 * Function : port_matches
1250 * Description : Compares a port against a port list.
1253 * 1 : port = The port to check.
1254 * 2 : port_list = The list of port to compare with.
1256 * Returns : TRUE for yes, FALSE otherwise.
1258 *********************************************************************/
1259 static int port_matches(const int port, const char *port_list)
1261 return ((NULL == port_list) || match_portlist(port_list, port));
1265 /*********************************************************************
1267 * Function : host_matches
1269 * Description : Compares a host against a host pattern.
1272 * 1 : url = The URL to match
1273 * 2 : pattern = The URL pattern
1275 * Returns : TRUE for yes, FALSE otherwise.
1277 *********************************************************************/
1278 static int host_matches(const struct http_request *http,
1279 const struct pattern_spec *pattern)
1281 assert(http->host != NULL);
1282 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1283 return ((NULL == pattern->pattern.url_spec.host_regex)
1284 || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0)));
1286 return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
1291 /*********************************************************************
1293 * Function : path_matches
1295 * Description : Compares a path against a path pattern.
1298 * 1 : path = The path to match
1299 * 2 : pattern = The URL pattern
1301 * Returns : TRUE for yes, FALSE otherwise.
1303 *********************************************************************/
1304 static int path_matches(const char *path, const struct pattern_spec *pattern)
1306 return ((NULL == pattern->pattern.url_spec.preg)
1307 || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
1311 /*********************************************************************
1313 * Function : url_match
1315 * Description : Compare a URL against a URL pattern.
1318 * 1 : pattern = a URL pattern
1319 * 2 : url = URL to match
1321 * Returns : Nonzero if the URL matches the pattern, else 0.
1323 *********************************************************************/
1324 int url_match(const struct pattern_spec *pattern,
1325 const struct http_request *http)
1327 if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
1329 /* It's not an URL pattern and thus shouldn't be matched against URLs */
1333 return (port_matches(http->port, pattern->pattern.url_spec.port_list)
1334 && host_matches(http, pattern) && path_matches(http->path, pattern));
1339 /*********************************************************************
1341 * Function : match_portlist
1343 * Description : Check if a given number is covered by a comma
1344 * separated list of numbers and ranges (a,b-c,d,..)
1347 * 1 : portlist = String with list
1348 * 2 : port = port to check
1350 * Returns : 0 => no match
1353 *********************************************************************/
1354 int match_portlist(const char *portlist, int port)
1356 char *min, *max, *next, *portlist_copy;
1358 min = portlist_copy = strdup_or_die(portlist);
1361 * Zero-terminate first item and remember offset for next
1363 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1369 * Loop through all items, checking for match
1373 if (NULL == (max = strchr(min, (int) '-')))
1376 * No dash, check for equality
1378 if (port == atoi(min))
1380 freez(portlist_copy);
1387 * This is a range, so check if between min and max,
1388 * or, if max was omitted, between min and 65K
1391 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1393 freez(portlist_copy);
1405 * Zero-terminate next item and remember offset for n+1
1407 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1413 freez(portlist_copy);
1419 /*********************************************************************
1421 * Function : parse_forwarder_address
1423 * Description : Parse out the username, password, host and port from
1424 * a forwarder address.
1427 * 1 : address = The forwarder address to parse.
1428 * 2 : hostname = Used to return the hostname. NULL on error.
1429 * 3 : port = Used to return the port. Untouched if no port
1431 * 4 : username = Used to return the username if any.
1432 * 5 : password = Used to return the password if any.
1434 * Returns : JB_ERR_OK on success
1435 * JB_ERR_MEMORY on out of memory
1436 * JB_ERR_PARSE on malformed address.
1438 *********************************************************************/
1439 jb_err parse_forwarder_address(char *address, char **hostname, int *port,
1440 char **username, char **password)
1445 tmp = *hostname = strdup_or_die(address);
1447 /* Parse username and password */
1448 if (username && password && (NULL != (p = strchr(*hostname, '@'))))
1451 *username = strdup_or_die(*hostname);
1452 *hostname = strdup_or_die(p);
1454 if (NULL != (p = strchr(*username, ':')))
1457 *password = strdup_or_die(p);
1462 /* Parse hostname and port */
1464 if ((*p == '[') && (NULL == strchr(p, ']')))
1466 /* XXX: Should do some more validity checks here. */
1467 return JB_ERR_PARSE;
1470 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1473 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1476 *port = (int)strtol(++p, NULL, 0);
1479 else if (NULL != (p = strchr(*hostname, ':')))
1482 *port = (int)strtol(p, NULL, 0);