1 /*********************************************************************
3 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
5 * Purpose : Declares functions to match URLs against URL
8 * Copyright : Written by and Copyright (C) 2001-2014
9 * the Privoxy team. https://www.privoxy.org/
11 * Based on the Internet Junkbuster originally written
12 * by and Copyright (C) 1997 Anonymous Coders and
13 * Junkbusters Corporation. http://www.junkbusters.com
15 * This program is free software; you can redistribute it
16 * and/or modify it under the terms of the GNU General
17 * Public License as published by the Free Software
18 * Foundation; either version 2 of the License, or (at
19 * your option) any later version.
21 * This program is distributed in the hope that it will
22 * be useful, but WITHOUT ANY WARRANTY; without even the
23 * implied warranty of MERCHANTABILITY or FITNESS FOR A
24 * PARTICULAR PURPOSE. See the GNU General Public
25 * License for more details.
27 * The GNU General Public License should be included with
28 * this file. If not, you can view it at
29 * http://www.gnu.org/copyleft/gpl.html
30 * or write to the Free Software Foundation, Inc., 59
31 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
33 *********************************************************************/
40 #include <sys/types.h>
48 #if !defined(_WIN32) && !defined(__OS2__)
65 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern);
67 /*********************************************************************
69 * Function : free_http_request
71 * Description : Freez a http_request structure
74 * 1 : http = points to a http_request structure to free
78 *********************************************************************/
79 void free_http_request(struct http_request *http)
88 freez(http->hostport);
91 freez(http->host_ip_addr_str);
92 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
100 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
101 /*********************************************************************
103 * Function : init_domain_components
105 * Description : Splits the domain name so we can compare it
106 * against wildcards. It used to be part of
107 * parse_http_url, but was separated because the
108 * same code is required in chat in case of
109 * intercepted requests.
112 * 1 : http = pointer to the http structure to hold elements.
114 * Returns : JB_ERR_OK on success
115 * JB_ERR_PARSE on malformed command/URL
116 * or >100 domains deep.
118 *********************************************************************/
119 jb_err init_domain_components(struct http_request *http)
121 char *vec[BUFFER_SIZE];
125 http->dbuffer = strdup_or_die(http->host);
127 /* map to lower case */
128 for (p = http->dbuffer; *p ; p++)
130 *p = (char)privoxy_tolower(*p);
133 /* split the domain name into components */
134 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
136 if (http->dcount <= 0)
139 * Error: More than SZ(vec) components in domain
140 * or: no components in domain
142 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
146 /* save a copy of the pointers in dvec */
147 size = (size_t)http->dcount * sizeof(*http->dvec);
149 http->dvec = malloc_or_die(size);
151 memcpy(http->dvec, vec, size);
155 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
158 /*********************************************************************
160 * Function : url_requires_percent_encoding
162 * Description : Checks if an URL contains invalid characters
163 * according to RFC 3986 that should be percent-encoded.
164 * Does not verify whether or not the passed string
165 * actually is a valid URL.
168 * 1 : url = URL to check
170 * Returns : True in case of valid URLs, false otherwise
172 *********************************************************************/
173 int url_requires_percent_encoding(const char *url)
175 static const char allowed_characters[128] = {
176 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
177 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
178 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
179 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
180 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
181 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
182 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
183 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
184 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
185 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
186 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
187 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
188 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
193 const unsigned int i = (unsigned char)*url++;
194 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
205 /*********************************************************************
207 * Function : parse_http_url
209 * Description : Parse out the host and port from the URL. Find the
210 * hostname & path, port (if ':'), and/or password (if '@')
213 * 1 : url = URL (or is it URI?) to break down
214 * 2 : http = pointer to the http structure to hold elements.
215 * Must be initialized with valid values (like NULLs).
216 * 3 : require_protocol = Whether or not URLs without
217 * protocol are acceptable.
219 * Returns : JB_ERR_OK on success
220 * JB_ERR_PARSE on malformed command/URL
221 * or >100 domains deep.
223 *********************************************************************/
224 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
226 int host_available = 1; /* A proxy can dream. */
229 * Save our initial URL
231 http->url = strdup_or_die(url);
234 * Check for * URI. If found, we're done.
236 if (*http->url == '*')
238 http->path = strdup_or_die("*");
239 http->hostport = strdup_or_die("");
240 if (http->url[1] != '\0')
249 * Split URL into protocol,hostport,path.
256 buf = strdup_or_die(url);
258 /* Find the start of the URL in our scratch space */
260 if (strncmpic(url_noproto, "http://", 7) == 0)
264 else if (strncmpic(url_noproto, "https://", 8) == 0)
267 * Should only happen when called from cgi_show_url_info().
272 else if (*url_noproto == '/')
275 * Short request line without protocol and host.
276 * Most likely because the client's request
277 * was intercepted and redirected into Privoxy.
282 else if (require_protocol)
288 url_path = strchr(url_noproto, '/');
289 if (url_path != NULL)
294 * If FEATURE_HTTPS_INSPECTION isn't available, ignore the
295 * path for https URLs so that we get consistent behaviour
296 * if a https URL is parsed. When the URL is actually
297 * retrieved, https hides the path part.
299 http->path = strdup_or_die(
300 #ifndef FEATURE_HTTPS_INSPECTION
306 http->hostport = strdup_or_die(url_noproto);
311 * Repair broken HTTP requests that don't contain a path,
312 * or CONNECT requests
314 http->path = strdup_or_die("/");
315 http->hostport = strdup_or_die(url_noproto);
323 /* Without host, there is nothing left to do here */
328 * Split hostport into user/password (ignored), host, port.
335 buf = strdup_or_die(http->hostport);
337 /* check if url contains username and/or password */
338 host = strchr(buf, '@');
341 /* Contains username/password, skip it and the @ sign. */
346 /* No username or password. */
350 /* Move after hostname before port number */
353 /* Numeric IPv6 address delimited by brackets */
355 port = strchr(host, ']');
359 /* Missing closing bracket */
370 else if (*port != ':')
372 /* Garbage after closing bracket */
379 /* Plain non-escaped hostname */
380 port = strchr(host, ':');
383 /* check if url contains port */
389 /* Terminate hostname and point to start of port string */
391 parsed_port = strtol(port, &endptr, 10);
392 if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0'))
394 log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url);
398 http->port = (int)parsed_port;
402 /* No port specified. */
403 http->port = (http->ssl ? 443 : 80);
406 http->host = strdup_or_die(host);
411 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
414 /* Split domain name so we can compare it against wildcards */
415 return init_domain_components(http);
416 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
421 /*********************************************************************
423 * Function : unknown_method
425 * Description : Checks whether a method is unknown.
428 * 1 : method = points to a http method
430 * Returns : TRUE if it's unknown, FALSE otherwise.
432 *********************************************************************/
433 static int unknown_method(const char *method)
435 static const char * const known_http_methods[] = {
436 /* Basic HTTP request type */
437 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
438 /* webDAV extensions (RFC2518) */
439 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
441 * Microsoft webDAV extension for Exchange 2000. See:
442 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
443 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
445 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
447 * Another Microsoft webDAV extension for Exchange 2000. See:
448 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
449 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
450 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
452 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
454 * Yet another WebDAV extension, this time for
455 * Web Distributed Authoring and Versioning (RFC3253)
457 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
458 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
460 * The PATCH method is defined by RFC5789, the format of the
461 * actual patch in the body depends on the application, but from
462 * Privoxy's point of view it doesn't matter.
468 for (i = 0; i < SZ(known_http_methods); i++)
470 if (0 == strcmpic(method, known_http_methods[i]))
481 /*********************************************************************
483 * Function : normalize_http_version
485 * Description : Take a supported HTTP version string and remove
486 * leading zeroes etc., reject unsupported versions.
488 * This is an explicit RFC 2616 (3.1) MUST and
489 * RFC 7230 mandates that intermediaries send their
490 * own HTTP-version in forwarded messages.
493 * 1 : http_version = HTTP version string
495 * Returns : JB_ERR_OK on success
496 * JB_ERR_PARSE if the HTTP version is unsupported
498 *********************************************************************/
499 static jb_err normalize_http_version(char *http_version)
501 unsigned int major_version;
502 unsigned int minor_version;
504 if (2 != sscanf(http_version, "HTTP/%u.%u", &major_version, &minor_version))
506 log_error(LOG_LEVEL_ERROR, "Unsupported HTTP version: %s", http_version);
510 if (major_version != 1 || (minor_version != 0 && minor_version != 1))
512 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
513 "versions are 1.0 and 1.1. This rules out: %s", http_version);
517 assert(strlen(http_version) >= 8);
518 snprintf(http_version, 9, "HTTP/%u.%u", major_version, minor_version);
525 /*********************************************************************
527 * Function : parse_http_request
529 * Description : Parse out the host and port from the URL. Find the
530 * hostname & path, port (if ':'), and/or password (if '@')
533 * 1 : req = HTTP request line to break down
534 * 2 : http = pointer to the http structure to hold elements
536 * Returns : JB_ERR_OK on success
537 * JB_ERR_CGI_PARAMS on malformed command/URL
538 * or >100 domains deep.
540 *********************************************************************/
541 jb_err parse_http_request(const char *req, struct http_request *http)
548 memset(http, '\0', sizeof(*http));
550 buf = strdup_or_die(req);
552 n = ssplit(buf, " \r\n", v, SZ(v));
560 * Fail in case of unknown methods
561 * which we might not handle correctly.
563 * XXX: There should be a config option
564 * to forward requests with unknown methods
565 * anyway. Most of them don't need special
568 if (unknown_method(v[0]))
570 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
575 if (JB_ERR_OK != normalize_http_version(v[2]))
581 http->ssl = !strcmpic(v[0], "CONNECT");
583 err = parse_http_url(v[1], http, !http->ssl);
591 * Copy the details into the structure
593 http->cmd = strdup_or_die(req);
594 http->gpc = strdup_or_die(v[0]);
595 http->version = strdup_or_die(v[2]);
596 http->ocmd = strdup_or_die(http->cmd);
605 /*********************************************************************
607 * Function : compile_pattern
609 * Description : Compiles a host, domain or TAG pattern.
612 * 1 : pattern = The pattern to compile.
613 * 2 : anchoring = How the regex should be modified
614 * before compilation. Can be either
615 * one of NO_ANCHORING, LEFT_ANCHORED,
616 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
617 * 3 : url = In case of failures, the spec member is
618 * logged and the structure freed.
619 * 4 : regex = Where the compiled regex should be stored.
621 * Returns : JB_ERR_OK - Success
622 * JB_ERR_PARSE - Cannot parse regex
624 *********************************************************************/
625 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
626 struct pattern_spec *url, regex_t **regex)
629 const char *fmt = NULL;
635 if (pattern[0] == '\0')
649 case RIGHT_ANCHORED_HOST:
656 log_error(LOG_LEVEL_FATAL,
657 "Invalid anchoring in compile_pattern %d", anchoring);
659 rebuf_size = strlen(pattern) + strlen(fmt);
660 rebuf = malloc_or_die(rebuf_size);
661 *regex = zalloc_or_die(sizeof(**regex));
663 snprintf(rebuf, rebuf_size, fmt, pattern);
665 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
669 size_t errlen = regerror(errcode, *regex, rebuf, rebuf_size);
670 if (errlen > (rebuf_size - (size_t)1))
672 errlen = rebuf_size - (size_t)1;
674 rebuf[errlen] = '\0';
675 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
676 pattern, url->spec, rebuf);
677 free_pattern_spec(url);
689 /*********************************************************************
691 * Function : compile_url_pattern
693 * Description : Compiles the three parts of an URL pattern.
696 * 1 : url = Target pattern_spec to be filled in.
697 * 2 : buf = The url pattern to compile. Will be messed up.
699 * Returns : JB_ERR_OK - Success
700 * JB_ERR_MEMORY - Out of memory
701 * JB_ERR_PARSE - Cannot parse regex
703 *********************************************************************/
704 static jb_err compile_url_pattern(struct pattern_spec *url, char *buf)
708 p = strchr(buf, '/');
712 * Only compile the regex if it consists of more than
713 * a single slash, otherwise it wouldn't affect the result.
718 * XXX: does it make sense to compile the slash at the beginning?
720 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg);
722 if (JB_ERR_OK != err)
731 * IPv6 numeric hostnames can contain colons, thus we need
732 * to delimit the hostname before the real port separator.
733 * As brackets are already used in the hostname pattern,
734 * we use angle brackets ('<', '>') instead.
736 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
743 /* IPv6 address without port number */
748 /* Garbage after address delimiter */
754 p = strchr(buf, ':');
760 url->pattern.url_spec.port_list = strdup_or_die(p);
764 url->pattern.url_spec.port_list = NULL;
769 return compile_host_pattern(url, buf);
777 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
778 /*********************************************************************
780 * Function : compile_host_pattern
782 * Description : Parses and compiles a host pattern.
785 * 1 : url = Target pattern_spec to be filled in.
786 * 2 : host_pattern = Host pattern to compile.
788 * Returns : JB_ERR_OK - Success
789 * JB_ERR_MEMORY - Out of memory
790 * JB_ERR_PARSE - Cannot parse regex
792 *********************************************************************/
793 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
795 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
800 /*********************************************************************
802 * Function : compile_host_pattern
804 * Description : Parses and "compiles" an old-school host pattern.
807 * 1 : url = Target pattern_spec to be filled in.
808 * 2 : host_pattern = Host pattern to parse.
810 * Returns : JB_ERR_OK - Success
811 * JB_ERR_PARSE - Cannot parse regex
813 *********************************************************************/
814 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
823 if (host_pattern[strlen(host_pattern) - 1] == '.')
825 url->pattern.url_spec.unanchored |= ANCHOR_RIGHT;
827 if (host_pattern[0] == '.')
829 url->pattern.url_spec.unanchored |= ANCHOR_LEFT;
833 * Split domain into components
835 url->pattern.url_spec.dbuffer = strdup_or_die(host_pattern);
840 for (p = url->pattern.url_spec.dbuffer; *p ; p++)
842 *p = (char)privoxy_tolower(*p);
846 * Split the domain name into components
848 url->pattern.url_spec.dcount = ssplit(url->pattern.url_spec.dbuffer, ".", v, SZ(v));
850 if (url->pattern.url_spec.dcount < 0)
852 free_pattern_spec(url);
855 else if (url->pattern.url_spec.dcount != 0)
858 * Save a copy of the pointers in dvec
860 size = (size_t)url->pattern.url_spec.dcount * sizeof(*url->pattern.url_spec.dvec);
862 url->pattern.url_spec.dvec = malloc_or_die(size);
864 memcpy(url->pattern.url_spec.dvec, v, size);
867 * else dcount == 0 in which case we needn't do anything,
868 * since dvec will never be accessed and the pattern will
875 /*********************************************************************
877 * Function : simplematch
879 * Description : String matching, with a (greedy) '*' wildcard that
880 * stands for zero or more arbitrary characters and
881 * character classes in [], which take both enumerations
885 * 1 : pattern = pattern for matching
886 * 2 : text = text to be matched
888 * Returns : 0 if match, else nonzero
890 *********************************************************************/
891 static int simplematch(const char *pattern, const char *text)
893 const unsigned char *pat = (const unsigned char *)pattern;
894 const unsigned char *txt = (const unsigned char *)text;
895 const unsigned char *fallback = pat;
898 unsigned char lastchar = 'a';
900 unsigned char charmap[32];
905 /* EOF pattern but !EOF text? */
918 /* '*' in the pattern? */
922 /* The pattern ends afterwards? Speed up the return. */
928 /* Else, set wildcard mode and remember position after '*' */
933 /* Character range specification? */
936 memset(charmap, '\0', sizeof(charmap));
938 while (*++pat != ']')
944 else if (*pat == '-')
946 if ((*++pat == ']') || *pat == '\0')
950 for (i = lastchar; i <= *pat; i++)
952 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
957 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
961 } /* -END- if Character range specification */
965 * Char match, or char range match?
969 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
979 * No match && no wildcard: No luck
983 else if (pat != fallback)
986 * Increment text pointer if in char range matching
993 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
997 * Restart matching from current text pointer
1004 /* Cut off extra '*'s */
1005 if (*pat == '*') pat++;
1007 /* If this is the pattern's end, fine! */
1013 /*********************************************************************
1015 * Function : simple_domaincmp
1017 * Description : Domain-wise Compare fqdn's. The comparison is
1018 * both left- and right-anchored. The individual
1019 * domain names are compared with simplematch().
1020 * This is only used by domain_match.
1023 * 1 : pv = array of patterns to compare
1024 * 2 : fv = array of domain components to compare
1025 * 3 : len = length of the arrays (both arrays are the
1026 * same length - if they weren't, it couldn't
1027 * possibly be a match).
1029 * Returns : 0 => domains are equivalent, else no match.
1031 *********************************************************************/
1032 static int simple_domaincmp(char **pv, char **fv, int len)
1036 for (n = 0; n < len; n++)
1038 if (simplematch(pv[n], fv[n]))
1049 /*********************************************************************
1051 * Function : domain_match
1053 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1054 * p.pattern->unachored, the comparison is un-, left-,
1055 * right-anchored, or both.
1056 * The individual domain names are compared with
1060 * 1 : p = a domain that may contain a '*' as a wildcard.
1061 * 2 : fqdn = domain name against which the patterns are compared.
1063 * Returns : 0 => domains are equivalent, else no match.
1065 *********************************************************************/
1066 static int domain_match(const struct pattern_spec *p, const struct http_request *fqdn)
1068 char **pv, **fv; /* vectors */
1070 int unanchored = p->pattern.url_spec.unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1072 plen = p->pattern.url_spec.dcount;
1073 flen = fqdn->dcount;
1077 /* fqdn is too short to match this pattern */
1081 pv = p->pattern.url_spec.dvec;
1084 if (unanchored == ANCHOR_LEFT)
1089 * Convert this into a fully anchored pattern with
1090 * the fqdn and pattern the same length
1092 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1093 return simple_domaincmp(pv, fv, plen);
1095 else if (unanchored == 0)
1097 /* Fully anchored, check length */
1102 return simple_domaincmp(pv, fv, plen);
1104 else if (unanchored == ANCHOR_RIGHT)
1106 /* Left anchored, ignore all extra in fqdn */
1107 return simple_domaincmp(pv, fv, plen);
1113 int maxn = flen - plen;
1114 for (n = 0; n <= maxn; n++)
1116 if (!simple_domaincmp(pv, fv, plen))
1121 * Doesn't match from start of fqdn
1122 * Try skipping first part of fqdn
1130 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
1133 /*********************************************************************
1135 * Function : create_pattern_spec
1137 * Description : Creates a "pattern_spec" structure from a string.
1138 * When finished, free with free_pattern_spec().
1141 * 1 : pattern = Target pattern_spec to be filled in.
1142 * Will be zeroed before use.
1143 * 2 : buf = Source pattern, null terminated. NOTE: The
1144 * contents of this buffer are destroyed by this
1145 * function. If this function succeeds, the
1146 * buffer is copied to pattern->spec. If this
1147 * function fails, the contents of the buffer
1150 * Returns : JB_ERR_OK - Success
1151 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1152 * written to system log)
1154 *********************************************************************/
1155 jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf)
1159 /** The tag pattern prefix to match */
1162 /** The length of the prefix to match */
1163 const size_t prefix_length;
1165 /** The pattern flag */
1166 const unsigned flag;
1168 { "TAG:", 4, PATTERN_SPEC_TAG_PATTERN},
1169 #ifdef FEATURE_CLIENT_TAGS
1170 { "CLIENT-TAG:", 11, PATTERN_SPEC_CLIENT_TAG_PATTERN},
1172 { "NO-REQUEST-TAG:", 15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN},
1173 { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN}
1180 memset(pattern, '\0', sizeof(*pattern));
1182 /* Remember the original specification for the CGI pages. */
1183 pattern->spec = strdup_or_die(buf);
1185 /* Check if it's a tag pattern */
1186 for (i = 0; i < SZ(tag_pattern); i++)
1188 if (0 == strncmpic(pattern->spec, tag_pattern[i].prefix, tag_pattern[i].prefix_length))
1190 /* The regex starts after the prefix */
1191 const char *tag_regex = buf + tag_pattern[i].prefix_length;
1193 pattern->flags |= tag_pattern[i].flag;
1195 return compile_pattern(tag_regex, NO_ANCHORING, pattern,
1196 &pattern->pattern.tag_regex);
1200 /* If it isn't a tag pattern it must be an URL pattern. */
1201 pattern->flags |= PATTERN_SPEC_URL_PATTERN;
1203 return compile_url_pattern(pattern, buf);
1208 /*********************************************************************
1210 * Function : free_pattern_spec
1212 * Description : Called from the "unloaders". Freez the pattern
1213 * structure elements.
1216 * 1 : pattern = pointer to a pattern_spec structure.
1220 *********************************************************************/
1221 void free_pattern_spec(struct pattern_spec *pattern)
1223 if (pattern == NULL) return;
1225 freez(pattern->spec);
1226 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1227 if (pattern->pattern.url_spec.host_regex)
1229 regfree(pattern->pattern.url_spec.host_regex);
1230 freez(pattern->pattern.url_spec.host_regex);
1233 freez(pattern->pattern.url_spec.dbuffer);
1234 freez(pattern->pattern.url_spec.dvec);
1235 pattern->pattern.url_spec.dcount = 0;
1236 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
1237 freez(pattern->pattern.url_spec.port_list);
1238 if (pattern->pattern.url_spec.preg)
1240 regfree(pattern->pattern.url_spec.preg);
1241 freez(pattern->pattern.url_spec.preg);
1243 if (pattern->pattern.tag_regex)
1245 regfree(pattern->pattern.tag_regex);
1246 freez(pattern->pattern.tag_regex);
1251 /*********************************************************************
1253 * Function : port_matches
1255 * Description : Compares a port against a port list.
1258 * 1 : port = The port to check.
1259 * 2 : port_list = The list of port to compare with.
1261 * Returns : TRUE for yes, FALSE otherwise.
1263 *********************************************************************/
1264 static int port_matches(const int port, const char *port_list)
1266 return ((NULL == port_list) || match_portlist(port_list, port));
1270 /*********************************************************************
1272 * Function : host_matches
1274 * Description : Compares a host against a host pattern.
1277 * 1 : url = The URL to match
1278 * 2 : pattern = The URL pattern
1280 * Returns : TRUE for yes, FALSE otherwise.
1282 *********************************************************************/
1283 static int host_matches(const struct http_request *http,
1284 const struct pattern_spec *pattern)
1286 assert(http->host != NULL);
1287 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1288 return ((NULL == pattern->pattern.url_spec.host_regex)
1289 || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0)));
1291 return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
1296 /*********************************************************************
1298 * Function : path_matches
1300 * Description : Compares a path against a path pattern.
1303 * 1 : path = The path to match
1304 * 2 : pattern = The URL pattern
1306 * Returns : TRUE for yes, FALSE otherwise.
1308 *********************************************************************/
1309 static int path_matches(const char *path, const struct pattern_spec *pattern)
1311 return ((NULL == pattern->pattern.url_spec.preg)
1312 || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
1316 /*********************************************************************
1318 * Function : url_match
1320 * Description : Compare a URL against a URL pattern.
1323 * 1 : pattern = a URL pattern
1324 * 2 : url = URL to match
1326 * Returns : Nonzero if the URL matches the pattern, else 0.
1328 *********************************************************************/
1329 int url_match(const struct pattern_spec *pattern,
1330 const struct http_request *http)
1332 if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
1334 /* It's not an URL pattern and thus shouldn't be matched against URLs */
1338 return (port_matches(http->port, pattern->pattern.url_spec.port_list)
1339 && host_matches(http, pattern) && path_matches(http->path, pattern));
1344 /*********************************************************************
1346 * Function : match_portlist
1348 * Description : Check if a given number is covered by a comma
1349 * separated list of numbers and ranges (a,b-c,d,..)
1352 * 1 : portlist = String with list
1353 * 2 : port = port to check
1355 * Returns : 0 => no match
1358 *********************************************************************/
1359 int match_portlist(const char *portlist, int port)
1361 char *min, *max, *next, *portlist_copy;
1363 min = portlist_copy = strdup_or_die(portlist);
1366 * Zero-terminate first item and remember offset for next
1368 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1374 * Loop through all items, checking for match
1378 if (NULL == (max = strchr(min, (int) '-')))
1381 * No dash, check for equality
1383 if (port == atoi(min))
1385 freez(portlist_copy);
1392 * This is a range, so check if between min and max,
1393 * or, if max was omitted, between min and 65K
1396 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1398 freez(portlist_copy);
1410 * Zero-terminate next item and remember offset for n+1
1412 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1418 freez(portlist_copy);
1424 /*********************************************************************
1426 * Function : parse_forwarder_address
1428 * Description : Parse out the username, password, host and port from
1429 * a forwarder address.
1432 * 1 : address = The forwarder address to parse.
1433 * 2 : hostname = Used to return the hostname. NULL on error.
1434 * 3 : port = Used to return the port. Untouched if no port
1436 * 4 : username = Used to return the username if any.
1437 * 5 : password = Used to return the password if any.
1439 * Returns : JB_ERR_OK on success
1440 * JB_ERR_MEMORY on out of memory
1441 * JB_ERR_PARSE on malformed address.
1443 *********************************************************************/
1444 jb_err parse_forwarder_address(char *address, char **hostname, int *port,
1445 char **username, char **password)
1450 tmp = *hostname = strdup_or_die(address);
1452 /* Parse username and password */
1453 if (username && password && (NULL != (p = strchr(*hostname, '@'))))
1456 *username = strdup_or_die(*hostname);
1457 *hostname = strdup_or_die(p);
1459 if (NULL != (p = strchr(*username, ':')))
1462 *password = strdup_or_die(p);
1467 /* Parse hostname and port */
1469 if ((*p == '[') && (NULL == strchr(p, ']')))
1471 /* XXX: Should do some more validity checks here. */
1472 return JB_ERR_PARSE;
1475 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1478 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1481 *port = (int)strtol(++p, NULL, 0);
1484 else if (NULL != (p = strchr(*hostname, ':')))
1487 *port = (int)strtol(p, NULL, 0);