1 /*********************************************************************
3 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
5 * Purpose : Declares functions to match URLs against URL
8 * Copyright : Written by and Copyright (C) 2001-2020
9 * the Privoxy team. https://www.privoxy.org/
11 * Based on the Internet Junkbuster originally written
12 * by and Copyright (C) 1997 Anonymous Coders and
13 * Junkbusters Corporation. http://www.junkbusters.com
15 * This program is free software; you can redistribute it
16 * and/or modify it under the terms of the GNU General
17 * Public License as published by the Free Software
18 * Foundation; either version 2 of the License, or (at
19 * your option) any later version.
21 * This program is distributed in the hope that it will
22 * be useful, but WITHOUT ANY WARRANTY; without even the
23 * implied warranty of MERCHANTABILITY or FITNESS FOR A
24 * PARTICULAR PURPOSE. See the GNU General Public
25 * License for more details.
27 * The GNU General Public License should be included with
28 * this file. If not, you can view it at
29 * http://www.gnu.org/copyleft/gpl.html
30 * or write to the Free Software Foundation, Inc., 59
31 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
33 *********************************************************************/
40 #include <sys/types.h>
65 static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern);
66 #ifdef FEATURE_PCRE_HOST_PATTERNS
67 static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern);
70 /*********************************************************************
72 * Function : free_http_request
74 * Description : Freez a http_request structure
77 * 1 : http = points to a http_request structure to free
81 *********************************************************************/
82 void free_http_request(struct http_request *http)
91 freez(http->hostport);
94 freez(http->host_ip_addr_str);
101 /*********************************************************************
103 * Function : init_domain_components
105 * Description : Splits the domain name so we can compare it
106 * against wildcards. It used to be part of
107 * parse_http_url, but was separated because the
108 * same code is required in chat in case of
109 * intercepted requests.
112 * 1 : http = pointer to the http structure to hold elements.
114 * Returns : JB_ERR_OK on success
115 * JB_ERR_PARSE on malformed command/URL
116 * or >100 domains deep.
118 *********************************************************************/
119 jb_err init_domain_components(struct http_request *http)
121 char *vec[BUFFER_SIZE];
125 http->dbuffer = strdup_or_die(http->host);
127 /* map to lower case */
128 for (p = http->dbuffer; *p ; p++)
130 *p = (char)privoxy_tolower(*p);
133 /* split the domain name into components */
134 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
136 if (http->dcount <= 0)
139 * Error: More than SZ(vec) components in domain
140 * or: no components in domain
142 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
146 /* save a copy of the pointers in dvec */
147 size = (size_t)http->dcount * sizeof(*http->dvec);
149 http->dvec = malloc_or_die(size);
151 memcpy(http->dvec, vec, size);
157 /*********************************************************************
159 * Function : url_requires_percent_encoding
161 * Description : Checks if an URL contains invalid characters
162 * according to RFC 3986 that should be percent-encoded.
163 * Does not verify whether or not the passed string
164 * actually is a valid URL.
167 * 1 : url = URL to check
169 * Returns : True in case of valid URLs, false otherwise
171 *********************************************************************/
172 int url_requires_percent_encoding(const char *url)
174 static const char allowed_characters[128] = {
175 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
176 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
177 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
178 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
179 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
180 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
181 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
182 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
183 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
184 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
185 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
186 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
187 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
192 const unsigned int i = (unsigned char)*url++;
193 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
204 /*********************************************************************
206 * Function : parse_http_url
208 * Description : Parse out the host and port from the URL. Find the
209 * hostname & path, port (if ':'), and/or password (if '@')
212 * 1 : url = URL (or is it URI?) to break down
213 * 2 : http = pointer to the http structure to hold elements.
214 * Must be initialized with valid values (like NULLs).
215 * 3 : require_protocol = Whether or not URLs without
216 * protocol are acceptable.
218 * Returns : JB_ERR_OK on success
219 * JB_ERR_PARSE on malformed command/URL
220 * or >100 domains deep.
222 *********************************************************************/
223 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
225 int host_available = 1; /* A proxy can dream. */
228 * Save our initial URL
230 http->url = strdup_or_die(url);
233 * Check for * URI. If found, we're done.
235 if (*http->url == '*')
237 http->path = strdup_or_die("*");
238 http->hostport = strdup_or_die("");
239 if (http->url[1] != '\0')
248 * Split URL into protocol,hostport,path.
255 buf = strdup_or_die(url);
257 /* Find the start of the URL in our scratch space */
259 if (strncmpic(url_noproto, "http://", 7) == 0)
263 else if (strncmpic(url_noproto, "https://", 8) == 0)
266 * Should only happen when called from cgi_show_url_info().
271 else if (*url_noproto == '/')
274 * Short request line without protocol and host.
275 * Most likely because the client's request
276 * was intercepted and redirected into Privoxy.
281 else if (require_protocol)
287 url_path = strchr(url_noproto, '/');
288 if (url_path != NULL)
293 * If FEATURE_HTTPS_INSPECTION isn't available, ignore the
294 * path for https URLs so that we get consistent behaviour
295 * if a https URL is parsed. When the URL is actually
296 * retrieved, https hides the path part.
298 http->path = strdup_or_die(
299 #ifndef FEATURE_HTTPS_INSPECTION
305 http->hostport = string_tolower(url_noproto);
310 * Repair broken HTTP requests that don't contain a path,
311 * or CONNECT requests
313 http->path = strdup_or_die("/");
314 http->hostport = string_tolower(url_noproto);
319 if (http->hostport == NULL)
327 /* Without host, there is nothing left to do here */
332 * Split hostport into user/password (ignored), host, port.
339 buf = strdup_or_die(http->hostport);
341 /* check if url contains username and/or password */
342 host = strchr(buf, '@');
345 /* Contains username/password, skip it and the @ sign. */
350 /* No username or password. */
354 /* Move after hostname before port number */
357 /* Numeric IPv6 address delimited by brackets */
359 port = strchr(host, ']');
363 /* Missing closing bracket */
374 else if (*port != ':')
376 /* Garbage after closing bracket */
383 /* Plain non-escaped hostname */
384 port = strchr(host, ':');
387 /* check if url contains port */
393 /* Terminate hostname and point to start of port string */
395 parsed_port = strtol(port, &endptr, 10);
396 if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0'))
398 log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url);
402 http->port = (int)parsed_port;
406 /* No port specified. */
407 http->port = (http->ssl ? 443 : 80);
410 http->host = strdup_or_die(host);
415 /* Split domain name so we can compare it against wildcards */
416 return init_domain_components(http);
421 /*********************************************************************
423 * Function : unknown_method
425 * Description : Checks whether a method is unknown.
428 * 1 : method = points to a http method
430 * Returns : TRUE if it's unknown, FALSE otherwise.
432 *********************************************************************/
433 static int unknown_method(const char *method)
435 static const char * const known_http_methods[] = {
436 /* Basic HTTP request type */
437 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
438 /* webDAV extensions (RFC2518) */
439 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
441 * Microsoft webDAV extension for Exchange 2000. See:
442 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
443 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
445 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
447 * Another Microsoft webDAV extension for Exchange 2000. See:
448 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
449 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
450 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
452 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
454 * Yet another WebDAV extension, this time for
455 * Web Distributed Authoring and Versioning (RFC3253)
457 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
458 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
460 * The PATCH method is defined by RFC5789, the format of the
461 * actual patch in the body depends on the application, but from
462 * Privoxy's point of view it doesn't matter.
468 for (i = 0; i < SZ(known_http_methods); i++)
470 if (0 == strcmpic(method, known_http_methods[i]))
481 /*********************************************************************
483 * Function : normalize_http_version
485 * Description : Take a supported HTTP version string and remove
486 * leading zeroes etc., reject unsupported versions.
488 * This is an explicit RFC 2616 (3.1) MUST and
489 * RFC 7230 mandates that intermediaries send their
490 * own HTTP-version in forwarded messages.
493 * 1 : http_version = HTTP version string
495 * Returns : JB_ERR_OK on success
496 * JB_ERR_PARSE if the HTTP version is unsupported
498 *********************************************************************/
499 static jb_err normalize_http_version(char *http_version)
501 unsigned int major_version;
502 unsigned int minor_version;
504 if (2 != sscanf(http_version, "HTTP/%u.%u", &major_version, &minor_version))
506 log_error(LOG_LEVEL_ERROR, "Unsupported HTTP version: %s", http_version);
510 if (major_version != 1 || (minor_version != 0 && minor_version != 1))
512 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
513 "versions are 1.0 and 1.1. This rules out: %s", http_version);
517 assert(strlen(http_version) >= 8);
518 snprintf(http_version, 9, "HTTP/%u.%u", major_version, minor_version);
525 /*********************************************************************
527 * Function : parse_http_request
529 * Description : Parse out the host and port from the URL. Find the
530 * hostname & path, port (if ':'), and/or password (if '@')
533 * 1 : req = HTTP request line to break down
534 * 2 : http = pointer to the http structure to hold elements
536 * Returns : JB_ERR_OK on success
537 * JB_ERR_CGI_PARAMS on malformed command/URL
538 * or >100 domains deep.
540 *********************************************************************/
541 jb_err parse_http_request(const char *req, struct http_request *http)
548 memset(http, '\0', sizeof(*http));
550 buf = strdup_or_die(req);
552 n = ssplit(buf, " \r\n", v, SZ(v));
560 * Fail in case of unknown methods
561 * which we might not handle correctly.
563 * XXX: There should be a config option
564 * to forward requests with unknown methods
565 * anyway. Most of them don't need special
568 if (unknown_method(v[0]))
570 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
575 if (JB_ERR_OK != normalize_http_version(v[2]))
581 http->ssl = !strcmpic(v[0], "CONNECT");
583 err = parse_http_url(v[1], http, !http->ssl);
591 * Copy the details into the structure
593 http->cmd = strdup_or_die(req);
594 http->gpc = strdup_or_die(v[0]);
595 http->version = strdup_or_die(v[2]);
596 http->ocmd = strdup_or_die(http->cmd);
605 /*********************************************************************
607 * Function : compile_pattern
609 * Description : Compiles a host, domain or TAG pattern.
612 * 1 : pattern = The pattern to compile.
613 * 2 : anchoring = How the regex should be modified
614 * before compilation. Can be either
615 * one of NO_ANCHORING, LEFT_ANCHORED,
616 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
617 * 3 : url = In case of failures, the spec member is
618 * logged and the structure freed.
619 * 4 : regex = Where the compiled regex should be stored.
621 * Returns : JB_ERR_OK - Success
622 * JB_ERR_PARSE - Cannot parse regex
624 *********************************************************************/
625 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
626 struct pattern_spec *url, regex_t **regex)
629 const char *fmt = NULL;
635 if (pattern[0] == '\0')
649 case RIGHT_ANCHORED_HOST:
656 log_error(LOG_LEVEL_FATAL,
657 "Invalid anchoring in compile_pattern %d", anchoring);
659 rebuf_size = strlen(pattern) + strlen(fmt);
660 rebuf = malloc_or_die(rebuf_size);
661 *regex = zalloc_or_die(sizeof(**regex));
663 snprintf(rebuf, rebuf_size, fmt, pattern);
665 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
669 size_t errlen = regerror(errcode, *regex, rebuf, rebuf_size);
670 if (errlen > (rebuf_size - (size_t)1))
672 errlen = rebuf_size - (size_t)1;
674 rebuf[errlen] = '\0';
675 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
676 pattern, url->spec, rebuf);
677 free_pattern_spec(url);
689 /*********************************************************************
691 * Function : compile_url_pattern
693 * Description : Compiles the three parts of an URL pattern.
696 * 1 : url = Target pattern_spec to be filled in.
697 * 2 : buf = The url pattern to compile. Will be messed up.
699 * Returns : JB_ERR_OK - Success
700 * JB_ERR_MEMORY - Out of memory
701 * JB_ERR_PARSE - Cannot parse regex
703 *********************************************************************/
704 static jb_err compile_url_pattern(struct pattern_spec *url, char *buf)
708 #ifdef FEATURE_PCRE_HOST_PATTERNS
709 const size_t prefix_length = 18;
710 if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0)
712 url->pattern.url_spec.host_regex_type = PCRE_HOST_PATTERN;
713 /* Overwrite the "PCRE-HOST-PATTERN:" prefix */
714 memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1);
718 url->pattern.url_spec.host_regex_type = VANILLA_HOST_PATTERN;
722 p = strchr(buf, '/');
726 * Only compile the regex if it consists of more than
727 * a single slash, otherwise it wouldn't affect the result.
732 * XXX: does it make sense to compile the slash at the beginning?
734 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg);
736 if (JB_ERR_OK != err)
745 * IPv6 numeric hostnames can contain colons, thus we need
746 * to delimit the hostname before the real port separator.
747 * As brackets are already used in the hostname pattern,
748 * we use angle brackets ('<', '>') instead.
750 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
757 /* IPv6 address without port number */
762 /* Garbage after address delimiter */
768 p = strchr(buf, ':');
774 url->pattern.url_spec.port_list = strdup_or_die(p);
778 url->pattern.url_spec.port_list = NULL;
783 #ifdef FEATURE_PCRE_HOST_PATTERNS
784 if (url->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
786 return compile_pcre_host_pattern(url, buf);
791 return compile_vanilla_host_pattern(url, buf);
800 #ifdef FEATURE_PCRE_HOST_PATTERNS
801 /*********************************************************************
803 * Function : compile_pcre_host_pattern
805 * Description : Parses and compiles a pcre host pattern.
808 * 1 : url = Target pattern_spec to be filled in.
809 * 2 : host_pattern = Host pattern to compile.
811 * Returns : JB_ERR_OK - Success
812 * JB_ERR_MEMORY - Out of memory
813 * JB_ERR_PARSE - Cannot parse regex
815 *********************************************************************/
816 static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern)
818 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
820 #endif /* def FEATURE_PCRE_HOST_PATTERNS */
823 /*********************************************************************
825 * Function : compile_vanilla_host_pattern
827 * Description : Parses and "compiles" an old-school host pattern.
830 * 1 : url = Target pattern_spec to be filled in.
831 * 2 : host_pattern = Host pattern to parse.
833 * Returns : JB_ERR_OK - Success
834 * JB_ERR_PARSE - Cannot parse regex
836 *********************************************************************/
837 static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern)
846 if (host_pattern[strlen(host_pattern) - 1] == '.')
848 url->pattern.url_spec.unanchored |= ANCHOR_RIGHT;
850 if (host_pattern[0] == '.')
852 url->pattern.url_spec.unanchored |= ANCHOR_LEFT;
856 * Split domain into components
858 url->pattern.url_spec.dbuffer = strdup_or_die(host_pattern);
863 for (p = url->pattern.url_spec.dbuffer; *p ; p++)
865 *p = (char)privoxy_tolower(*p);
869 * Split the domain name into components
871 url->pattern.url_spec.dcount = ssplit(url->pattern.url_spec.dbuffer, ".", v, SZ(v));
873 if (url->pattern.url_spec.dcount < 0)
875 free_pattern_spec(url);
878 else if (url->pattern.url_spec.dcount != 0)
881 * Save a copy of the pointers in dvec
883 size = (size_t)url->pattern.url_spec.dcount * sizeof(*url->pattern.url_spec.dvec);
885 url->pattern.url_spec.dvec = malloc_or_die(size);
887 memcpy(url->pattern.url_spec.dvec, v, size);
890 * else dcount == 0 in which case we needn't do anything,
891 * since dvec will never be accessed and the pattern will
898 /*********************************************************************
900 * Function : simplematch
902 * Description : String matching, with a (greedy) '*' wildcard that
903 * stands for zero or more arbitrary characters and
904 * character classes in [], which take both enumerations
908 * 1 : pattern = pattern for matching
909 * 2 : text = text to be matched
911 * Returns : 0 if match, else nonzero
913 *********************************************************************/
914 static int simplematch(const char *pattern, const char *text)
916 const unsigned char *pat = (const unsigned char *)pattern;
917 const unsigned char *txt = (const unsigned char *)text;
918 const unsigned char *fallback = pat;
921 unsigned char lastchar = 'a';
923 unsigned char charmap[32];
928 /* EOF pattern but !EOF text? */
941 /* '*' in the pattern? */
945 /* The pattern ends afterwards? Speed up the return. */
951 /* Else, set wildcard mode and remember position after '*' */
956 /* Character range specification? */
959 memset(charmap, '\0', sizeof(charmap));
961 while (*++pat != ']')
967 else if (*pat == '-')
969 if ((*++pat == ']') || *pat == '\0')
973 for (i = lastchar; i <= *pat; i++)
975 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
980 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
984 } /* -END- if Character range specification */
988 * Char match, or char range match?
992 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
1002 * No match && no wildcard: No luck
1006 else if (pat != fallback)
1009 * Increment text pointer if in char range matching
1016 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
1020 * Restart matching from current text pointer
1027 /* Cut off extra '*'s */
1028 if (*pat == '*') pat++;
1030 /* If this is the pattern's end, fine! */
1036 /*********************************************************************
1038 * Function : simple_domaincmp
1040 * Description : Domain-wise Compare fqdn's. The comparison is
1041 * both left- and right-anchored. The individual
1042 * domain names are compared with simplematch().
1043 * This is only used by domain_match.
1046 * 1 : pv = array of patterns to compare
1047 * 2 : fv = array of domain components to compare
1048 * 3 : len = length of the arrays (both arrays are the
1049 * same length - if they weren't, it couldn't
1050 * possibly be a match).
1052 * Returns : 0 => domains are equivalent, else no match.
1054 *********************************************************************/
1055 static int simple_domaincmp(char **pv, char **fv, int len)
1059 for (n = 0; n < len; n++)
1061 if (simplematch(pv[n], fv[n]))
1072 /*********************************************************************
1074 * Function : domain_match
1076 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1077 * p.pattern->unachored, the comparison is un-, left-,
1078 * right-anchored, or both.
1079 * The individual domain names are compared with
1083 * 1 : p = a domain that may contain a '*' as a wildcard.
1084 * 2 : fqdn = domain name against which the patterns are compared.
1086 * Returns : 0 => domains are equivalent, else no match.
1088 *********************************************************************/
1089 static int domain_match(const struct pattern_spec *p, const struct http_request *fqdn)
1091 char **pv, **fv; /* vectors */
1093 int unanchored = p->pattern.url_spec.unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1095 plen = p->pattern.url_spec.dcount;
1096 flen = fqdn->dcount;
1100 /* fqdn is too short to match this pattern */
1104 pv = p->pattern.url_spec.dvec;
1107 if (unanchored == ANCHOR_LEFT)
1112 * Convert this into a fully anchored pattern with
1113 * the fqdn and pattern the same length
1115 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1116 return simple_domaincmp(pv, fv, plen);
1118 else if (unanchored == 0)
1120 /* Fully anchored, check length */
1125 return simple_domaincmp(pv, fv, plen);
1127 else if (unanchored == ANCHOR_RIGHT)
1129 /* Left anchored, ignore all extra in fqdn */
1130 return simple_domaincmp(pv, fv, plen);
1136 int maxn = flen - plen;
1137 for (n = 0; n <= maxn; n++)
1139 if (!simple_domaincmp(pv, fv, plen))
1144 * Doesn't match from start of fqdn
1145 * Try skipping first part of fqdn
1155 /*********************************************************************
1157 * Function : create_pattern_spec
1159 * Description : Creates a "pattern_spec" structure from a string.
1160 * When finished, free with free_pattern_spec().
1163 * 1 : pattern = Target pattern_spec to be filled in.
1164 * Will be zeroed before use.
1165 * 2 : buf = Source pattern, null terminated. NOTE: The
1166 * contents of this buffer are destroyed by this
1167 * function. If this function succeeds, the
1168 * buffer is copied to pattern->spec. If this
1169 * function fails, the contents of the buffer
1172 * Returns : JB_ERR_OK - Success
1173 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1174 * written to system log)
1176 *********************************************************************/
1177 jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf)
1181 /** The tag pattern prefix to match */
1184 /** The length of the prefix to match */
1185 const size_t prefix_length;
1187 /** The pattern flag */
1188 const unsigned flag;
1190 { "TAG:", 4, PATTERN_SPEC_TAG_PATTERN},
1191 #ifdef FEATURE_CLIENT_TAGS
1192 { "CLIENT-TAG:", 11, PATTERN_SPEC_CLIENT_TAG_PATTERN},
1194 { "NO-REQUEST-TAG:", 15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN},
1195 { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN}
1202 memset(pattern, '\0', sizeof(*pattern));
1204 /* Remember the original specification for the CGI pages. */
1205 pattern->spec = strdup_or_die(buf);
1207 /* Check if it's a tag pattern */
1208 for (i = 0; i < SZ(tag_pattern); i++)
1210 if (0 == strncmpic(pattern->spec, tag_pattern[i].prefix, tag_pattern[i].prefix_length))
1212 /* The regex starts after the prefix */
1213 const char *tag_regex = buf + tag_pattern[i].prefix_length;
1215 pattern->flags |= tag_pattern[i].flag;
1217 return compile_pattern(tag_regex, NO_ANCHORING, pattern,
1218 &pattern->pattern.tag_regex);
1222 /* If it isn't a tag pattern it must be an URL pattern. */
1223 pattern->flags |= PATTERN_SPEC_URL_PATTERN;
1225 return compile_url_pattern(pattern, buf);
1230 /*********************************************************************
1232 * Function : free_pattern_spec
1234 * Description : Called from the "unloaders". Freez the pattern
1235 * structure elements.
1238 * 1 : pattern = pointer to a pattern_spec structure.
1242 *********************************************************************/
1243 void free_pattern_spec(struct pattern_spec *pattern)
1245 if (pattern == NULL) return;
1247 freez(pattern->spec);
1248 #ifdef FEATURE_PCRE_HOST_PATTERNS
1249 if (pattern->pattern.url_spec.host_regex)
1251 regfree(pattern->pattern.url_spec.host_regex);
1252 freez(pattern->pattern.url_spec.host_regex);
1254 #endif /* def FEATURE_PCRE_HOST_PATTERNS */
1255 freez(pattern->pattern.url_spec.dbuffer);
1256 freez(pattern->pattern.url_spec.dvec);
1257 pattern->pattern.url_spec.dcount = 0;
1258 freez(pattern->pattern.url_spec.port_list);
1259 if (pattern->pattern.url_spec.preg)
1261 regfree(pattern->pattern.url_spec.preg);
1262 freez(pattern->pattern.url_spec.preg);
1264 if (pattern->pattern.tag_regex)
1266 regfree(pattern->pattern.tag_regex);
1267 freez(pattern->pattern.tag_regex);
1272 /*********************************************************************
1274 * Function : port_matches
1276 * Description : Compares a port against a port list.
1279 * 1 : port = The port to check.
1280 * 2 : port_list = The list of port to compare with.
1282 * Returns : TRUE for yes, FALSE otherwise.
1284 *********************************************************************/
1285 static int port_matches(const int port, const char *port_list)
1287 return ((NULL == port_list) || match_portlist(port_list, port));
1291 /*********************************************************************
1293 * Function : host_matches
1295 * Description : Compares a host against a host pattern.
1298 * 1 : url = The URL to match
1299 * 2 : pattern = The URL pattern
1301 * Returns : TRUE for yes, FALSE otherwise.
1303 *********************************************************************/
1304 static int host_matches(const struct http_request *http,
1305 const struct pattern_spec *pattern)
1307 assert(http->host != NULL);
1308 #ifdef FEATURE_PCRE_HOST_PATTERNS
1309 if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
1311 return ((NULL == pattern->pattern.url_spec.host_regex)
1312 || (0 == regexec(pattern->pattern.url_spec.host_regex,
1313 http->host, 0, NULL, 0)));
1316 return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
1320 /*********************************************************************
1322 * Function : path_matches
1324 * Description : Compares a path against a path pattern.
1327 * 1 : path = The path to match
1328 * 2 : pattern = The URL pattern
1330 * Returns : TRUE for yes, FALSE otherwise.
1332 *********************************************************************/
1333 static int path_matches(const char *path, const struct pattern_spec *pattern)
1335 return ((NULL == pattern->pattern.url_spec.preg)
1336 || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
1340 /*********************************************************************
1342 * Function : url_match
1344 * Description : Compare a URL against a URL pattern.
1347 * 1 : pattern = a URL pattern
1348 * 2 : url = URL to match
1350 * Returns : Nonzero if the URL matches the pattern, else 0.
1352 *********************************************************************/
1353 int url_match(const struct pattern_spec *pattern,
1354 const struct http_request *http)
1356 if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
1358 /* It's not an URL pattern and thus shouldn't be matched against URLs */
1362 return (port_matches(http->port, pattern->pattern.url_spec.port_list)
1363 && host_matches(http, pattern) && path_matches(http->path, pattern));
1368 /*********************************************************************
1370 * Function : match_portlist
1372 * Description : Check if a given number is covered by a comma
1373 * separated list of numbers and ranges (a,b-c,d,..)
1376 * 1 : portlist = String with list
1377 * 2 : port = port to check
1379 * Returns : 0 => no match
1382 *********************************************************************/
1383 int match_portlist(const char *portlist, int port)
1385 char *min, *max, *next, *portlist_copy;
1387 min = portlist_copy = strdup_or_die(portlist);
1390 * Zero-terminate first item and remember offset for next
1392 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1398 * Loop through all items, checking for match
1402 if (NULL == (max = strchr(min, (int) '-')))
1405 * No dash, check for equality
1407 if (port == atoi(min))
1409 freez(portlist_copy);
1416 * This is a range, so check if between min and max,
1417 * or, if max was omitted, between min and 65K
1420 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1422 freez(portlist_copy);
1434 * Zero-terminate next item and remember offset for n+1
1436 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1442 freez(portlist_copy);
1448 /*********************************************************************
1450 * Function : parse_forwarder_address
1452 * Description : Parse out the username, password, host and port from
1453 * a forwarder address.
1456 * 1 : address = The forwarder address to parse.
1457 * 2 : hostname = Used to return the hostname. NULL on error.
1458 * 3 : port = Used to return the port. Untouched if no port
1460 * 4 : username = Used to return the username if any.
1461 * 5 : password = Used to return the password if any.
1463 * Returns : JB_ERR_OK on success
1464 * JB_ERR_MEMORY on out of memory
1465 * JB_ERR_PARSE on malformed address.
1467 *********************************************************************/
1468 jb_err parse_forwarder_address(char *address, char **hostname, int *port,
1469 char **username, char **password)
1474 tmp = *hostname = strdup_or_die(address);
1476 /* Parse username and password */
1477 if (username && password && (NULL != (p = strchr(*hostname, '@'))))
1480 *username = strdup_or_die(*hostname);
1481 *hostname = strdup_or_die(p);
1483 if (NULL != (p = strchr(*username, ':')))
1486 *password = strdup_or_die(p);
1491 /* Parse hostname and port */
1493 if ((*p == '[') && (NULL == strchr(p, ']')))
1495 /* XXX: Should do some more validity checks here. */
1496 return JB_ERR_PARSE;
1499 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1502 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1505 *port = (int)strtol(++p, NULL, 0);
1508 else if (NULL != (p = strchr(*hostname, ':')))
1511 *port = (int)strtol(p, NULL, 0);