1 const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.88 2016/03/17 10:40:53 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
6 * Purpose : Declares functions to match URLs against URL
9 * Copyright : Written by and Copyright (C) 2001-2014
10 * the Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34 *********************************************************************/
41 #include <sys/types.h>
49 #if !defined(_WIN32) && !defined(__OS2__)
59 const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
68 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern);
70 /*********************************************************************
72 * Function : free_http_request
74 * Description : Freez a http_request structure
77 * 1 : http = points to a http_request structure to free
81 *********************************************************************/
82 void free_http_request(struct http_request *http)
91 freez(http->hostport);
94 freez(http->host_ip_addr_str);
95 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
103 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
104 /*********************************************************************
106 * Function : init_domain_components
108 * Description : Splits the domain name so we can compare it
109 * against wildcards. It used to be part of
110 * parse_http_url, but was separated because the
111 * same code is required in chat in case of
112 * intercepted requests.
115 * 1 : http = pointer to the http structure to hold elements.
117 * Returns : JB_ERR_OK on success
118 * JB_ERR_PARSE on malformed command/URL
119 * or >100 domains deep.
121 *********************************************************************/
122 jb_err init_domain_components(struct http_request *http)
124 char *vec[BUFFER_SIZE];
128 http->dbuffer = strdup_or_die(http->host);
130 /* map to lower case */
131 for (p = http->dbuffer; *p ; p++)
133 *p = (char)privoxy_tolower(*p);
136 /* split the domain name into components */
137 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
139 if (http->dcount <= 0)
142 * Error: More than SZ(vec) components in domain
143 * or: no components in domain
145 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
149 /* save a copy of the pointers in dvec */
150 size = (size_t)http->dcount * sizeof(*http->dvec);
152 http->dvec = malloc_or_die(size);
154 memcpy(http->dvec, vec, size);
158 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
161 /*********************************************************************
163 * Function : url_requires_percent_encoding
165 * Description : Checks if an URL contains invalid characters
166 * according to RFC 3986 that should be percent-encoded.
167 * Does not verify whether or not the passed string
168 * actually is a valid URL.
171 * 1 : url = URL to check
173 * Returns : True in case of valid URLs, false otherwise
175 *********************************************************************/
176 int url_requires_percent_encoding(const char *url)
178 static const char allowed_characters[128] = {
179 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
180 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
181 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
182 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
183 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
184 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
185 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
186 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
187 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
188 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
189 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
190 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
191 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
196 const unsigned int i = (unsigned char)*url++;
197 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
208 /*********************************************************************
210 * Function : parse_http_url
212 * Description : Parse out the host and port from the URL. Find the
213 * hostname & path, port (if ':'), and/or password (if '@')
216 * 1 : url = URL (or is it URI?) to break down
217 * 2 : http = pointer to the http structure to hold elements.
218 * Must be initialized with valid values (like NULLs).
219 * 3 : require_protocol = Whether or not URLs without
220 * protocol are acceptable.
222 * Returns : JB_ERR_OK on success
223 * JB_ERR_PARSE on malformed command/URL
224 * or >100 domains deep.
226 *********************************************************************/
227 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
229 int host_available = 1; /* A proxy can dream. */
232 * Save our initial URL
234 http->url = strdup_or_die(url);
237 * Check for * URI. If found, we're done.
239 if (*http->url == '*')
241 http->path = strdup_or_die("*");
242 http->hostport = strdup_or_die("");
243 if (http->url[1] != '\0')
252 * Split URL into protocol,hostport,path.
259 buf = strdup_or_die(url);
261 /* Find the start of the URL in our scratch space */
263 if (strncmpic(url_noproto, "http://", 7) == 0)
267 else if (strncmpic(url_noproto, "https://", 8) == 0)
270 * Should only happen when called from cgi_show_url_info().
275 else if (*url_noproto == '/')
278 * Short request line without protocol and host.
279 * Most likely because the client's request
280 * was intercepted and redirected into Privoxy.
285 else if (require_protocol)
291 url_path = strchr(url_noproto, '/');
292 if (url_path != NULL)
297 * NOTE: The following line ignores the path for HTTPS URLS.
298 * This means that you get consistent behaviour if you type a
299 * https URL in and it's parsed by the function. (When the
300 * URL is actually retrieved, SSL hides the path part).
302 http->path = strdup_or_die(http->ssl ? "/" : url_path);
304 http->hostport = strdup_or_die(url_noproto);
309 * Repair broken HTTP requests that don't contain a path,
310 * or CONNECT requests
312 http->path = strdup_or_die("/");
313 http->hostport = strdup_or_die(url_noproto);
321 /* Without host, there is nothing left to do here */
326 * Split hostport into user/password (ignored), host, port.
333 buf = strdup_or_die(http->hostport);
335 /* check if url contains username and/or password */
336 host = strchr(buf, '@');
339 /* Contains username/password, skip it and the @ sign. */
344 /* No username or password. */
348 /* Move after hostname before port number */
351 /* Numeric IPv6 address delimited by brackets */
353 port = strchr(host, ']');
357 /* Missing closing bracket */
368 else if (*port != ':')
370 /* Garbage after closing bracket */
377 /* Plain non-escaped hostname */
378 port = strchr(host, ':');
381 /* check if url contains port */
387 /* Terminate hostname and point to start of port string */
389 parsed_port = strtol(port, &endptr, 10);
390 if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0'))
392 log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url);
396 http->port = (int)parsed_port;
400 /* No port specified. */
401 http->port = (http->ssl ? 443 : 80);
404 http->host = strdup_or_die(host);
409 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
412 /* Split domain name so we can compare it against wildcards */
413 return init_domain_components(http);
414 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
419 /*********************************************************************
421 * Function : unknown_method
423 * Description : Checks whether a method is unknown.
426 * 1 : method = points to a http method
428 * Returns : TRUE if it's unknown, FALSE otherwise.
430 *********************************************************************/
431 static int unknown_method(const char *method)
433 static const char * const known_http_methods[] = {
434 /* Basic HTTP request type */
435 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
436 /* webDAV extensions (RFC2518) */
437 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
439 * Microsoft webDAV extension for Exchange 2000. See:
440 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
441 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
443 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
445 * Another Microsoft webDAV extension for Exchange 2000. See:
446 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
447 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
448 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
450 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
452 * Yet another WebDAV extension, this time for
453 * Web Distributed Authoring and Versioning (RFC3253)
455 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
456 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
458 * The PATCH method is defined by RFC5789, the format of the
459 * actual patch in the body depends on the application, but from
460 * Privoxy's point of view it doesn't matter.
466 for (i = 0; i < SZ(known_http_methods); i++)
468 if (0 == strcmpic(method, known_http_methods[i]))
479 /*********************************************************************
481 * Function : normalize_http_version
483 * Description : Take a supported HTTP version string and remove
484 * leading zeroes etc., reject unsupported versions.
486 * This is an explicit RFC 2616 (3.1) MUST and
487 * RFC 7230 mandates that intermediaries send their
488 * own HTTP-version in forwarded messages.
491 * 1 : http_version = HTTP version string
493 * Returns : JB_ERR_OK on success
494 * JB_ERR_PARSE if the HTTP version is unsupported
496 *********************************************************************/
497 jb_err static normalize_http_version(char *http_version)
499 unsigned int major_version;
500 unsigned int minor_version;
502 if (2 != sscanf(http_version, "HTTP/%u.%u", &major_version, &minor_version))
504 log_error(LOG_LEVEL_ERROR, "Unsupported HTTP version: %s", http_version);
508 if (major_version != 1 || (minor_version != 0 && minor_version != 1))
510 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
511 "versions are 1.0 and 1.1. This rules out: %s", http_version);
515 assert(strlen(http_version) >= 8);
516 snprintf(http_version, 9, "HTTP/%u.%u", major_version, minor_version);
523 /*********************************************************************
525 * Function : parse_http_request
527 * Description : Parse out the host and port from the URL. Find the
528 * hostname & path, port (if ':'), and/or password (if '@')
531 * 1 : req = HTTP request line to break down
532 * 2 : http = pointer to the http structure to hold elements
534 * Returns : JB_ERR_OK on success
535 * JB_ERR_CGI_PARAMS on malformed command/URL
536 * or >100 domains deep.
538 *********************************************************************/
539 jb_err parse_http_request(const char *req, struct http_request *http)
546 memset(http, '\0', sizeof(*http));
548 buf = strdup_or_die(req);
550 n = ssplit(buf, " \r\n", v, SZ(v));
558 * Fail in case of unknown methods
559 * which we might not handle correctly.
561 * XXX: There should be a config option
562 * to forward requests with unknown methods
563 * anyway. Most of them don't need special
566 if (unknown_method(v[0]))
568 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
573 if (JB_ERR_OK != normalize_http_version(v[2]))
579 http->ssl = !strcmpic(v[0], "CONNECT");
581 err = parse_http_url(v[1], http, !http->ssl);
589 * Copy the details into the structure
591 http->cmd = strdup_or_die(req);
592 http->gpc = strdup_or_die(v[0]);
593 http->ver = strdup_or_die(v[2]);
594 http->ocmd = strdup_or_die(http->cmd);
603 /*********************************************************************
605 * Function : compile_pattern
607 * Description : Compiles a host, domain or TAG pattern.
610 * 1 : pattern = The pattern to compile.
611 * 2 : anchoring = How the regex should be modified
612 * before compilation. Can be either
613 * one of NO_ANCHORING, LEFT_ANCHORED,
614 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
615 * 3 : url = In case of failures, the spec member is
616 * logged and the structure freed.
617 * 4 : regex = Where the compiled regex should be stored.
619 * Returns : JB_ERR_OK - Success
620 * JB_ERR_PARSE - Cannot parse regex
622 *********************************************************************/
623 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
624 struct pattern_spec *url, regex_t **regex)
627 const char *fmt = NULL;
633 if (pattern[0] == '\0')
647 case RIGHT_ANCHORED_HOST:
654 log_error(LOG_LEVEL_FATAL,
655 "Invalid anchoring in compile_pattern %d", anchoring);
657 rebuf_size = strlen(pattern) + strlen(fmt);
658 rebuf = malloc_or_die(rebuf_size);
659 *regex = zalloc_or_die(sizeof(**regex));
661 snprintf(rebuf, rebuf_size, fmt, pattern);
663 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
667 size_t errlen = regerror(errcode, *regex, rebuf, rebuf_size);
668 if (errlen > (rebuf_size - (size_t)1))
670 errlen = rebuf_size - (size_t)1;
672 rebuf[errlen] = '\0';
673 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
674 pattern, url->spec, rebuf);
675 free_pattern_spec(url);
687 /*********************************************************************
689 * Function : compile_url_pattern
691 * Description : Compiles the three parts of an URL pattern.
694 * 1 : url = Target pattern_spec to be filled in.
695 * 2 : buf = The url pattern to compile. Will be messed up.
697 * Returns : JB_ERR_OK - Success
698 * JB_ERR_MEMORY - Out of memory
699 * JB_ERR_PARSE - Cannot parse regex
701 *********************************************************************/
702 static jb_err compile_url_pattern(struct pattern_spec *url, char *buf)
706 p = strchr(buf, '/');
710 * Only compile the regex if it consists of more than
711 * a single slash, otherwise it wouldn't affect the result.
716 * XXX: does it make sense to compile the slash at the beginning?
718 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg);
720 if (JB_ERR_OK != err)
729 * IPv6 numeric hostnames can contain colons, thus we need
730 * to delimit the hostname before the real port separator.
731 * As brackets are already used in the hostname pattern,
732 * we use angle brackets ('<', '>') instead.
734 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
741 /* IPv6 address without port number */
746 /* Garbage after address delimiter */
752 p = strchr(buf, ':');
758 url->pattern.url_spec.port_list = strdup_or_die(p);
762 url->pattern.url_spec.port_list = NULL;
767 return compile_host_pattern(url, buf);
775 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
776 /*********************************************************************
778 * Function : compile_host_pattern
780 * Description : Parses and compiles a host pattern.
783 * 1 : url = Target pattern_spec to be filled in.
784 * 2 : host_pattern = Host pattern to compile.
786 * Returns : JB_ERR_OK - Success
787 * JB_ERR_MEMORY - Out of memory
788 * JB_ERR_PARSE - Cannot parse regex
790 *********************************************************************/
791 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
793 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
798 /*********************************************************************
800 * Function : compile_host_pattern
802 * Description : Parses and "compiles" an old-school host pattern.
805 * 1 : url = Target pattern_spec to be filled in.
806 * 2 : host_pattern = Host pattern to parse.
808 * Returns : JB_ERR_OK - Success
809 * JB_ERR_PARSE - Cannot parse regex
811 *********************************************************************/
812 static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
821 if (host_pattern[strlen(host_pattern) - 1] == '.')
823 url->pattern.url_spec.unanchored |= ANCHOR_RIGHT;
825 if (host_pattern[0] == '.')
827 url->pattern.url_spec.unanchored |= ANCHOR_LEFT;
831 * Split domain into components
833 url->pattern.url_spec.dbuffer = strdup_or_die(host_pattern);
838 for (p = url->pattern.url_spec.dbuffer; *p ; p++)
840 *p = (char)privoxy_tolower(*p);
844 * Split the domain name into components
846 url->pattern.url_spec.dcount = ssplit(url->pattern.url_spec.dbuffer, ".", v, SZ(v));
848 if (url->pattern.url_spec.dcount < 0)
850 free_pattern_spec(url);
853 else if (url->pattern.url_spec.dcount != 0)
856 * Save a copy of the pointers in dvec
858 size = (size_t)url->pattern.url_spec.dcount * sizeof(*url->pattern.url_spec.dvec);
860 url->pattern.url_spec.dvec = malloc_or_die(size);
862 memcpy(url->pattern.url_spec.dvec, v, size);
865 * else dcount == 0 in which case we needn't do anything,
866 * since dvec will never be accessed and the pattern will
873 /*********************************************************************
875 * Function : simplematch
877 * Description : String matching, with a (greedy) '*' wildcard that
878 * stands for zero or more arbitrary characters and
879 * character classes in [], which take both enumerations
883 * 1 : pattern = pattern for matching
884 * 2 : text = text to be matched
886 * Returns : 0 if match, else nonzero
888 *********************************************************************/
889 static int simplematch(const char *pattern, const char *text)
891 const unsigned char *pat = (const unsigned char *)pattern;
892 const unsigned char *txt = (const unsigned char *)text;
893 const unsigned char *fallback = pat;
896 unsigned char lastchar = 'a';
898 unsigned char charmap[32];
903 /* EOF pattern but !EOF text? */
916 /* '*' in the pattern? */
920 /* The pattern ends afterwards? Speed up the return. */
926 /* Else, set wildcard mode and remember position after '*' */
931 /* Character range specification? */
934 memset(charmap, '\0', sizeof(charmap));
936 while (*++pat != ']')
942 else if (*pat == '-')
944 if ((*++pat == ']') || *pat == '\0')
948 for (i = lastchar; i <= *pat; i++)
950 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
955 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
959 } /* -END- if Character range specification */
963 * Char match, or char range match?
967 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
977 * No match && no wildcard: No luck
981 else if (pat != fallback)
984 * Increment text pointer if in char range matching
991 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
995 * Restart matching from current text pointer
1002 /* Cut off extra '*'s */
1003 if (*pat == '*') pat++;
1005 /* If this is the pattern's end, fine! */
1011 /*********************************************************************
1013 * Function : simple_domaincmp
1015 * Description : Domain-wise Compare fqdn's. The comparison is
1016 * both left- and right-anchored. The individual
1017 * domain names are compared with simplematch().
1018 * This is only used by domain_match.
1021 * 1 : pv = array of patterns to compare
1022 * 2 : fv = array of domain components to compare
1023 * 3 : len = length of the arrays (both arrays are the
1024 * same length - if they weren't, it couldn't
1025 * possibly be a match).
1027 * Returns : 0 => domains are equivalent, else no match.
1029 *********************************************************************/
1030 static int simple_domaincmp(char **pv, char **fv, int len)
1034 for (n = 0; n < len; n++)
1036 if (simplematch(pv[n], fv[n]))
1047 /*********************************************************************
1049 * Function : domain_match
1051 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1052 * p.pattern->unachored, the comparison is un-, left-,
1053 * right-anchored, or both.
1054 * The individual domain names are compared with
1058 * 1 : p = a domain that may contain a '*' as a wildcard.
1059 * 2 : fqdn = domain name against which the patterns are compared.
1061 * Returns : 0 => domains are equivalent, else no match.
1063 *********************************************************************/
1064 static int domain_match(const struct pattern_spec *p, const struct http_request *fqdn)
1066 char **pv, **fv; /* vectors */
1068 int unanchored = p->pattern.url_spec.unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1070 plen = p->pattern.url_spec.dcount;
1071 flen = fqdn->dcount;
1075 /* fqdn is too short to match this pattern */
1079 pv = p->pattern.url_spec.dvec;
1082 if (unanchored == ANCHOR_LEFT)
1087 * Convert this into a fully anchored pattern with
1088 * the fqdn and pattern the same length
1090 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1091 return simple_domaincmp(pv, fv, plen);
1093 else if (unanchored == 0)
1095 /* Fully anchored, check length */
1100 return simple_domaincmp(pv, fv, plen);
1102 else if (unanchored == ANCHOR_RIGHT)
1104 /* Left anchored, ignore all extra in fqdn */
1105 return simple_domaincmp(pv, fv, plen);
1111 int maxn = flen - plen;
1112 for (n = 0; n <= maxn; n++)
1114 if (!simple_domaincmp(pv, fv, plen))
1119 * Doesn't match from start of fqdn
1120 * Try skipping first part of fqdn
1128 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
1131 /*********************************************************************
1133 * Function : create_pattern_spec
1135 * Description : Creates a "pattern_spec" structure from a string.
1136 * When finished, free with free_pattern_spec().
1139 * 1 : pattern = Target pattern_spec to be filled in.
1140 * Will be zeroed before use.
1141 * 2 : buf = Source pattern, null terminated. NOTE: The
1142 * contents of this buffer are destroyed by this
1143 * function. If this function succeeds, the
1144 * buffer is copied to pattern->spec. If this
1145 * function fails, the contents of the buffer
1148 * Returns : JB_ERR_OK - Success
1149 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1150 * written to system log)
1152 *********************************************************************/
1153 jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf)
1157 /** The tag pattern prefix to match */
1160 /** The length of the prefix to match */
1161 const size_t prefix_length;
1163 /** The pattern flag */
1164 const unsigned flag;
1166 { "TAG:", 4, PATTERN_SPEC_TAG_PATTERN},
1167 #ifdef FEATURE_CLIENT_TAGS
1168 { "CLIENT-TAG:", 11, PATTERN_SPEC_CLIENT_TAG_PATTERN},
1170 { "NO-REQUEST-TAG:", 15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN},
1171 { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN}
1178 memset(pattern, '\0', sizeof(*pattern));
1180 /* Remember the original specification for the CGI pages. */
1181 pattern->spec = strdup_or_die(buf);
1183 /* Check if it's a tag pattern */
1184 for (i = 0; i < SZ(tag_pattern); i++)
1186 if (0 == strncmpic(pattern->spec, tag_pattern[i].prefix, tag_pattern[i].prefix_length))
1188 /* The regex starts after the prefix */
1189 const char *tag_regex = buf + tag_pattern[i].prefix_length;
1191 pattern->flags |= tag_pattern[i].flag;
1193 return compile_pattern(tag_regex, NO_ANCHORING, pattern,
1194 &pattern->pattern.tag_regex);
1198 /* If it isn't a tag pattern it must be an URL pattern. */
1199 pattern->flags |= PATTERN_SPEC_URL_PATTERN;
1201 return compile_url_pattern(pattern, buf);
1206 /*********************************************************************
1208 * Function : free_pattern_spec
1210 * Description : Called from the "unloaders". Freez the pattern
1211 * structure elements.
1214 * 1 : pattern = pointer to a pattern_spec structure.
1218 *********************************************************************/
1219 void free_pattern_spec(struct pattern_spec *pattern)
1221 if (pattern == NULL) return;
1223 freez(pattern->spec);
1224 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1225 if (pattern->pattern.url_spec.host_regex)
1227 regfree(pattern->pattern.url_spec.host_regex);
1228 freez(pattern->pattern.url_spec.host_regex);
1231 freez(pattern->pattern.url_spec.dbuffer);
1232 freez(pattern->pattern.url_spec.dvec);
1233 pattern->pattern.url_spec.dcount = 0;
1234 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
1235 freez(pattern->pattern.url_spec.port_list);
1236 if (pattern->pattern.url_spec.preg)
1238 regfree(pattern->pattern.url_spec.preg);
1239 freez(pattern->pattern.url_spec.preg);
1241 if (pattern->pattern.tag_regex)
1243 regfree(pattern->pattern.tag_regex);
1244 freez(pattern->pattern.tag_regex);
1249 /*********************************************************************
1251 * Function : port_matches
1253 * Description : Compares a port against a port list.
1256 * 1 : port = The port to check.
1257 * 2 : port_list = The list of port to compare with.
1259 * Returns : TRUE for yes, FALSE otherwise.
1261 *********************************************************************/
1262 static int port_matches(const int port, const char *port_list)
1264 return ((NULL == port_list) || match_portlist(port_list, port));
1268 /*********************************************************************
1270 * Function : host_matches
1272 * Description : Compares a host against a host pattern.
1275 * 1 : url = The URL to match
1276 * 2 : pattern = The URL pattern
1278 * Returns : TRUE for yes, FALSE otherwise.
1280 *********************************************************************/
1281 static int host_matches(const struct http_request *http,
1282 const struct pattern_spec *pattern)
1284 assert(http->host != NULL);
1285 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1286 return ((NULL == pattern->pattern.url_spec.host_regex)
1287 || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0)));
1289 return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
1294 /*********************************************************************
1296 * Function : path_matches
1298 * Description : Compares a path against a path pattern.
1301 * 1 : path = The path to match
1302 * 2 : pattern = The URL pattern
1304 * Returns : TRUE for yes, FALSE otherwise.
1306 *********************************************************************/
1307 static int path_matches(const char *path, const struct pattern_spec *pattern)
1309 return ((NULL == pattern->pattern.url_spec.preg)
1310 || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
1314 /*********************************************************************
1316 * Function : url_match
1318 * Description : Compare a URL against a URL pattern.
1321 * 1 : pattern = a URL pattern
1322 * 2 : url = URL to match
1324 * Returns : Nonzero if the URL matches the pattern, else 0.
1326 *********************************************************************/
1327 int url_match(const struct pattern_spec *pattern,
1328 const struct http_request *http)
1330 if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
1332 /* It's not an URL pattern and thus shouldn't be matched against URLs */
1336 return (port_matches(http->port, pattern->pattern.url_spec.port_list)
1337 && host_matches(http, pattern) && path_matches(http->path, pattern));
1342 /*********************************************************************
1344 * Function : match_portlist
1346 * Description : Check if a given number is covered by a comma
1347 * separated list of numbers and ranges (a,b-c,d,..)
1350 * 1 : portlist = String with list
1351 * 2 : port = port to check
1353 * Returns : 0 => no match
1356 *********************************************************************/
1357 int match_portlist(const char *portlist, int port)
1359 char *min, *max, *next, *portlist_copy;
1361 min = portlist_copy = strdup_or_die(portlist);
1364 * Zero-terminate first item and remember offset for next
1366 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1372 * Loop through all items, checking for match
1376 if (NULL == (max = strchr(min, (int) '-')))
1379 * No dash, check for equality
1381 if (port == atoi(min))
1383 freez(portlist_copy);
1390 * This is a range, so check if between min and max,
1391 * or, if max was omitted, between min and 65K
1394 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1396 freez(portlist_copy);
1408 * Zero-terminate next item and remember offset for n+1
1410 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1416 freez(portlist_copy);
1422 /*********************************************************************
1424 * Function : parse_forwarder_address
1426 * Description : Parse out the host and port from a forwarder address.
1429 * 1 : address = The forwarder address to parse.
1430 * 2 : hostname = Used to return the hostname. NULL on error.
1431 * 3 : port = Used to return the port. Untouched if no port
1434 * Returns : JB_ERR_OK on success
1435 * JB_ERR_MEMORY on out of memory
1436 * JB_ERR_PARSE on malformed address.
1438 *********************************************************************/
1439 jb_err parse_forwarder_address(char *address, char **hostname, int *port)
1443 if ((*address == '[') && (NULL == strchr(address, ']')))
1445 /* XXX: Should do some more validity checks here. */
1446 return JB_ERR_PARSE;
1449 *hostname = strdup_or_die(address);
1451 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1454 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1457 *port = (int)strtol(++p, NULL, 0);
1460 else if (NULL != (p = strchr(*hostname, ':')))
1463 *port = (int)strtol(p, NULL, 0);