1 const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.72 2012/07/23 12:42:53 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
6 * Purpose : Declares functions to match URLs against URL
9 * Copyright : Written by and Copyright (C) 2001-2011
10 * the Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34 *********************************************************************/
41 #include <sys/types.h>
49 #if !defined(_WIN32) && !defined(__OS2__)
59 const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
68 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern);
70 /*********************************************************************
72 * Function : free_http_request
74 * Description : Freez a http_request structure
77 * 1 : http = points to a http_request structure to free
81 *********************************************************************/
82 void free_http_request(struct http_request *http)
91 freez(http->hostport);
94 freez(http->host_ip_addr_str);
95 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
103 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
104 /*********************************************************************
106 * Function : init_domain_components
108 * Description : Splits the domain name so we can compare it
109 * against wildcards. It used to be part of
110 * parse_http_url, but was separated because the
111 * same code is required in chat in case of
112 * intercepted requests.
115 * 1 : http = pointer to the http structure to hold elements.
117 * Returns : JB_ERR_OK on success
118 * JB_ERR_PARSE on malformed command/URL
119 * or >100 domains deep.
121 *********************************************************************/
122 jb_err init_domain_components(struct http_request *http)
124 char *vec[BUFFER_SIZE];
128 http->dbuffer = strdup_or_die(http->host);
130 /* map to lower case */
131 for (p = http->dbuffer; *p ; p++)
133 *p = (char)privoxy_tolower(*p);
136 /* split the domain name into components */
137 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
139 if (http->dcount <= 0)
142 * Error: More than SZ(vec) components in domain
143 * or: no components in domain
145 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
149 /* save a copy of the pointers in dvec */
150 size = (size_t)http->dcount * sizeof(*http->dvec);
152 http->dvec = malloc_or_die(size);
154 memcpy(http->dvec, vec, size);
158 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
161 /*********************************************************************
163 * Function : url_requires_percent_encoding
165 * Description : Checks if an URL contains invalid characters
166 * according to RFC 3986 that should be percent-encoded.
167 * Does not verify whether or not the passed string
168 * actually is a valid URL.
171 * 1 : url = URL to check
173 * Returns : True in case of valid URLs, false otherwise
175 *********************************************************************/
176 int url_requires_percent_encoding(const char *url)
178 static const char allowed_characters[128] = {
179 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
180 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
181 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
182 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
183 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
184 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
185 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
186 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
187 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
188 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
189 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
190 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
191 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
196 const unsigned int i = (unsigned char)*url++;
197 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
208 /*********************************************************************
210 * Function : parse_http_url
212 * Description : Parse out the host and port from the URL. Find the
213 * hostname & path, port (if ':'), and/or password (if '@')
216 * 1 : url = URL (or is it URI?) to break down
217 * 2 : http = pointer to the http structure to hold elements.
218 * Must be initialized with valid values (like NULLs).
219 * 3 : require_protocol = Whether or not URLs without
220 * protocol are acceptable.
222 * Returns : JB_ERR_OK on success
223 * JB_ERR_PARSE on malformed command/URL
224 * or >100 domains deep.
226 *********************************************************************/
227 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
229 int host_available = 1; /* A proxy can dream. */
232 * Save our initial URL
234 http->url = strdup_or_die(url);
237 * Check for * URI. If found, we're done.
239 if (*http->url == '*')
241 http->path = strdup_or_die("*");
242 http->hostport = strdup_or_die("");
243 if (http->url[1] != '\0')
252 * Split URL into protocol,hostport,path.
259 buf = strdup_or_die(url);
261 /* Find the start of the URL in our scratch space */
263 if (strncmpic(url_noproto, "http://", 7) == 0)
267 else if (strncmpic(url_noproto, "https://", 8) == 0)
270 * Should only happen when called from cgi_show_url_info().
275 else if (*url_noproto == '/')
278 * Short request line without protocol and host.
279 * Most likely because the client's request
280 * was intercepted and redirected into Privoxy.
285 else if (require_protocol)
291 url_path = strchr(url_noproto, '/');
292 if (url_path != NULL)
297 * NOTE: The following line ignores the path for HTTPS URLS.
298 * This means that you get consistent behaviour if you type a
299 * https URL in and it's parsed by the function. (When the
300 * URL is actually retrieved, SSL hides the path part).
302 http->path = strdup_or_die(http->ssl ? "/" : url_path);
304 http->hostport = strdup_or_die(url_noproto);
309 * Repair broken HTTP requests that don't contain a path,
310 * or CONNECT requests
312 http->path = strdup_or_die("/");
313 http->hostport = strdup_or_die(url_noproto);
321 /* Without host, there is nothing left to do here */
326 * Split hostport into user/password (ignored), host, port.
333 buf = strdup_or_die(http->hostport);
335 /* check if url contains username and/or password */
336 host = strchr(buf, '@');
339 /* Contains username/password, skip it and the @ sign. */
344 /* No username or password. */
348 /* Move after hostname before port number */
351 /* Numeric IPv6 address delimited by brackets */
353 port = strchr(host, ']');
357 /* Missing closing bracket */
368 else if (*port != ':')
370 /* Garbage after closing bracket */
377 /* Plain non-escaped hostname */
378 port = strchr(host, ':');
381 /* check if url contains port */
385 /* Terminate hostname and point to start of port string */
387 http->port = atoi(port);
391 /* No port specified. */
392 http->port = (http->ssl ? 443 : 80);
395 http->host = strdup_or_die(host);
400 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
403 /* Split domain name so we can compare it against wildcards */
404 return init_domain_components(http);
405 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
410 /*********************************************************************
412 * Function : unknown_method
414 * Description : Checks whether a method is unknown.
417 * 1 : method = points to a http method
419 * Returns : TRUE if it's unknown, FALSE otherwise.
421 *********************************************************************/
422 static int unknown_method(const char *method)
424 static const char * const known_http_methods[] = {
425 /* Basic HTTP request type */
426 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
427 /* webDAV extensions (RFC2518) */
428 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
430 * Microsoft webDAV extension for Exchange 2000. See:
431 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
432 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
434 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
436 * Another Microsoft webDAV extension for Exchange 2000. See:
437 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
438 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
439 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
441 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
443 * Yet another WebDAV extension, this time for
444 * Web Distributed Authoring and Versioning (RFC3253)
446 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
447 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
451 for (i = 0; i < SZ(known_http_methods); i++)
453 if (0 == strcmpic(method, known_http_methods[i]))
464 /*********************************************************************
466 * Function : parse_http_request
468 * Description : Parse out the host and port from the URL. Find the
469 * hostname & path, port (if ':'), and/or password (if '@')
472 * 1 : req = HTTP request line to break down
473 * 2 : http = pointer to the http structure to hold elements
475 * Returns : JB_ERR_OK on success
476 * JB_ERR_CGI_PARAMS on malformed command/URL
477 * or >100 domains deep.
479 *********************************************************************/
480 jb_err parse_http_request(const char *req, struct http_request *http)
483 char *v[10]; /* XXX: Why 10? We should only need three. */
487 memset(http, '\0', sizeof(*http));
489 buf = strdup_or_die(req);
491 n = ssplit(buf, " \r\n", v, SZ(v));
499 * Fail in case of unknown methods
500 * which we might not handle correctly.
502 * XXX: There should be a config option
503 * to forward requests with unknown methods
504 * anyway. Most of them don't need special
507 if (unknown_method(v[0]))
509 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
514 if (strcmpic(v[2], "HTTP/1.1") && strcmpic(v[2], "HTTP/1.0"))
516 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
517 "versions are 1.0 and 1.1. This rules out: %s", v[2]);
522 http->ssl = !strcmpic(v[0], "CONNECT");
524 err = parse_http_url(v[1], http, !http->ssl);
532 * Copy the details into the structure
534 http->cmd = strdup_or_die(req);
535 http->gpc = strdup_or_die(v[0]);
536 http->ver = strdup_or_die(v[2]);
545 /*********************************************************************
547 * Function : compile_pattern
549 * Description : Compiles a host, domain or TAG pattern.
552 * 1 : pattern = The pattern to compile.
553 * 2 : anchoring = How the regex should be modified
554 * before compilation. Can be either
555 * one of NO_ANCHORING, LEFT_ANCHORED,
556 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
557 * 3 : url = In case of failures, the spec member is
558 * logged and the structure freed.
559 * 4 : regex = Where the compiled regex should be stored.
561 * Returns : JB_ERR_OK - Success
562 * JB_ERR_MEMORY - Out of memory
563 * JB_ERR_PARSE - Cannot parse regex
565 *********************************************************************/
566 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
567 struct url_spec *url, regex_t **regex)
570 char rebuf[BUFFER_SIZE];
571 const char *fmt = NULL;
574 assert(strlen(pattern) < sizeof(rebuf) - 2);
576 if (pattern[0] == '\0')
590 case RIGHT_ANCHORED_HOST:
597 log_error(LOG_LEVEL_FATAL,
598 "Invalid anchoring in compile_pattern %d", anchoring);
601 *regex = zalloc(sizeof(**regex));
605 return JB_ERR_MEMORY;
608 snprintf(rebuf, sizeof(rebuf), fmt, pattern);
610 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
614 size_t errlen = regerror(errcode, *regex, rebuf, sizeof(rebuf));
615 if (errlen > (sizeof(rebuf) - (size_t)1))
617 errlen = sizeof(rebuf) - (size_t)1;
619 rebuf[errlen] = '\0';
620 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
621 pattern, url->spec, rebuf);
632 /*********************************************************************
634 * Function : compile_url_pattern
636 * Description : Compiles the three parts of an URL pattern.
639 * 1 : url = Target url_spec to be filled in.
640 * 2 : buf = The url pattern to compile. Will be messed up.
642 * Returns : JB_ERR_OK - Success
643 * JB_ERR_MEMORY - Out of memory
644 * JB_ERR_PARSE - Cannot parse regex
646 *********************************************************************/
647 static jb_err compile_url_pattern(struct url_spec *url, char *buf)
651 p = strchr(buf, '/');
655 * Only compile the regex if it consists of more than
656 * a single slash, otherwise it wouldn't affect the result.
661 * XXX: does it make sense to compile the slash at the beginning?
663 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->preg);
665 if (JB_ERR_OK != err)
674 * IPv6 numeric hostnames can contain colons, thus we need
675 * to delimit the hostname before the real port separator.
676 * As brackets are already used in the hostname pattern,
677 * we use angle brackets ('<', '>') instead.
679 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
686 /* IPv6 address without port number */
691 /* Garbage after address delimiter */
697 p = strchr(buf, ':');
703 url->port_list = strdup_or_die(p);
707 url->port_list = NULL;
712 return compile_host_pattern(url, buf);
720 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
721 /*********************************************************************
723 * Function : compile_host_pattern
725 * Description : Parses and compiles a host pattern.
728 * 1 : url = Target url_spec to be filled in.
729 * 2 : host_pattern = Host pattern to compile.
731 * Returns : JB_ERR_OK - Success
732 * JB_ERR_MEMORY - Out of memory
733 * JB_ERR_PARSE - Cannot parse regex
735 *********************************************************************/
736 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
738 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->host_regex);
743 /*********************************************************************
745 * Function : compile_host_pattern
747 * Description : Parses and "compiles" an old-school host pattern.
750 * 1 : url = Target url_spec to be filled in.
751 * 2 : host_pattern = Host pattern to parse.
753 * Returns : JB_ERR_OK - Success
754 * JB_ERR_PARSE - Cannot parse regex
756 *********************************************************************/
757 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
766 if (host_pattern[strlen(host_pattern) - 1] == '.')
768 url->unanchored |= ANCHOR_RIGHT;
770 if (host_pattern[0] == '.')
772 url->unanchored |= ANCHOR_LEFT;
776 * Split domain into components
778 url->dbuffer = strdup_or_die(host_pattern);
783 for (p = url->dbuffer; *p ; p++)
785 *p = (char)privoxy_tolower(*p);
789 * Split the domain name into components
791 url->dcount = ssplit(url->dbuffer, ".", v, SZ(v));
796 return JB_ERR_MEMORY;
798 else if (url->dcount != 0)
801 * Save a copy of the pointers in dvec
803 size = (size_t)url->dcount * sizeof(*url->dvec);
805 url->dvec = malloc_or_die(size);
807 memcpy(url->dvec, v, size);
810 * else dcount == 0 in which case we needn't do anything,
811 * since dvec will never be accessed and the pattern will
818 /*********************************************************************
820 * Function : simplematch
822 * Description : String matching, with a (greedy) '*' wildcard that
823 * stands for zero or more arbitrary characters and
824 * character classes in [], which take both enumerations
828 * 1 : pattern = pattern for matching
829 * 2 : text = text to be matched
831 * Returns : 0 if match, else nonzero
833 *********************************************************************/
834 static int simplematch(const char *pattern, const char *text)
836 const unsigned char *pat = (const unsigned char *)pattern;
837 const unsigned char *txt = (const unsigned char *)text;
838 const unsigned char *fallback = pat;
841 unsigned char lastchar = 'a';
843 unsigned char charmap[32];
848 /* EOF pattern but !EOF text? */
861 /* '*' in the pattern? */
865 /* The pattern ends afterwards? Speed up the return. */
871 /* Else, set wildcard mode and remember position after '*' */
876 /* Character range specification? */
879 memset(charmap, '\0', sizeof(charmap));
881 while (*++pat != ']')
887 else if (*pat == '-')
889 if ((*++pat == ']') || *pat == '\0')
893 for (i = lastchar; i <= *pat; i++)
895 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
900 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
904 } /* -END- if Character range specification */
908 * Char match, or char range match?
912 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
922 * No match && no wildcard: No luck
926 else if (pat != fallback)
929 * Increment text pointer if in char range matching
936 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
940 * Restart matching from current text pointer
947 /* Cut off extra '*'s */
948 if (*pat == '*') pat++;
950 /* If this is the pattern's end, fine! */
956 /*********************************************************************
958 * Function : simple_domaincmp
960 * Description : Domain-wise Compare fqdn's. The comparison is
961 * both left- and right-anchored. The individual
962 * domain names are compared with simplematch().
963 * This is only used by domain_match.
966 * 1 : pv = array of patterns to compare
967 * 2 : fv = array of domain components to compare
968 * 3 : len = length of the arrays (both arrays are the
969 * same length - if they weren't, it couldn't
970 * possibly be a match).
972 * Returns : 0 => domains are equivalent, else no match.
974 *********************************************************************/
975 static int simple_domaincmp(char **pv, char **fv, int len)
979 for (n = 0; n < len; n++)
981 if (simplematch(pv[n], fv[n]))
992 /*********************************************************************
994 * Function : domain_match
996 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
997 * pattern->unachored, the comparison is un-, left-,
998 * right-anchored, or both.
999 * The individual domain names are compared with
1003 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1004 * 2 : fqdn = domain name against which the patterns are compared.
1006 * Returns : 0 => domains are equivalent, else no match.
1008 *********************************************************************/
1009 static int domain_match(const struct url_spec *pattern, const struct http_request *fqdn)
1011 char **pv, **fv; /* vectors */
1013 int unanchored = pattern->unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1015 plen = pattern->dcount;
1016 flen = fqdn->dcount;
1020 /* fqdn is too short to match this pattern */
1027 if (unanchored == ANCHOR_LEFT)
1032 * Convert this into a fully anchored pattern with
1033 * the fqdn and pattern the same length
1035 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1036 return simple_domaincmp(pv, fv, plen);
1038 else if (unanchored == 0)
1040 /* Fully anchored, check length */
1045 return simple_domaincmp(pv, fv, plen);
1047 else if (unanchored == ANCHOR_RIGHT)
1049 /* Left anchored, ignore all extra in fqdn */
1050 return simple_domaincmp(pv, fv, plen);
1056 int maxn = flen - plen;
1057 for (n = 0; n <= maxn; n++)
1059 if (!simple_domaincmp(pv, fv, plen))
1064 * Doesn't match from start of fqdn
1065 * Try skipping first part of fqdn
1073 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
1076 /*********************************************************************
1078 * Function : create_url_spec
1080 * Description : Creates a "url_spec" structure from a string.
1081 * When finished, free with free_url_spec().
1084 * 1 : url = Target url_spec to be filled in. Will be
1085 * zeroed before use.
1086 * 2 : buf = Source pattern, null terminated. NOTE: The
1087 * contents of this buffer are destroyed by this
1088 * function. If this function succeeds, the
1089 * buffer is copied to url->spec. If this
1090 * function fails, the contents of the buffer
1093 * Returns : JB_ERR_OK - Success
1094 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1095 * written to system log)
1097 *********************************************************************/
1098 jb_err create_url_spec(struct url_spec *url, char *buf)
1103 memset(url, '\0', sizeof(*url));
1105 /* Remember the original specification for the CGI pages. */
1106 url->spec = strdup_or_die(buf);
1108 /* Is it a tag pattern? */
1109 if (0 == strncmpic(url->spec, "TAG:", 4))
1111 /* The pattern starts with the first character after "TAG:" */
1112 const char *tag_pattern = buf + 4;
1113 return compile_pattern(tag_pattern, NO_ANCHORING, url, &url->tag_regex);
1116 /* If it isn't a tag pattern it must be an URL pattern. */
1117 return compile_url_pattern(url, buf);
1121 /*********************************************************************
1123 * Function : free_url_spec
1125 * Description : Called from the "unloaders". Freez the url
1126 * structure elements.
1129 * 1 : url = pointer to a url_spec structure.
1133 *********************************************************************/
1134 void free_url_spec(struct url_spec *url)
1136 if (url == NULL) return;
1139 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1140 if (url->host_regex)
1142 regfree(url->host_regex);
1143 freez(url->host_regex);
1146 freez(url->dbuffer);
1149 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
1150 freez(url->port_list);
1158 regfree(url->tag_regex);
1159 freez(url->tag_regex);
1164 /*********************************************************************
1166 * Function : port_matches
1168 * Description : Compares a port against a port list.
1171 * 1 : port = The port to check.
1172 * 2 : port_list = The list of port to compare with.
1174 * Returns : TRUE for yes, FALSE otherwise.
1176 *********************************************************************/
1177 static int port_matches(const int port, const char *port_list)
1179 return ((NULL == port_list) || match_portlist(port_list, port));
1183 /*********************************************************************
1185 * Function : host_matches
1187 * Description : Compares a host against a host pattern.
1190 * 1 : url = The URL to match
1191 * 2 : pattern = The URL pattern
1193 * Returns : TRUE for yes, FALSE otherwise.
1195 *********************************************************************/
1196 static int host_matches(const struct http_request *http,
1197 const struct url_spec *pattern)
1199 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1200 return ((NULL == pattern->host_regex)
1201 || (0 == regexec(pattern->host_regex, http->host, 0, NULL, 0)));
1203 return ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, http)));
1208 /*********************************************************************
1210 * Function : path_matches
1212 * Description : Compares a path against a path pattern.
1215 * 1 : path = The path to match
1216 * 2 : pattern = The URL pattern
1218 * Returns : TRUE for yes, FALSE otherwise.
1220 *********************************************************************/
1221 static int path_matches(const char *path, const struct url_spec *pattern)
1223 return ((NULL == pattern->preg)
1224 || (0 == regexec(pattern->preg, path, 0, NULL, 0)));
1228 /*********************************************************************
1230 * Function : url_match
1232 * Description : Compare a URL against a URL pattern.
1235 * 1 : pattern = a URL pattern
1236 * 2 : url = URL to match
1238 * Returns : Nonzero if the URL matches the pattern, else 0.
1240 *********************************************************************/
1241 int url_match(const struct url_spec *pattern,
1242 const struct http_request *http)
1244 if (pattern->tag_regex != NULL)
1246 /* It's a tag pattern and shouldn't be matched against URLs */
1250 return (port_matches(http->port, pattern->port_list)
1251 && host_matches(http, pattern) && path_matches(http->path, pattern));
1256 /*********************************************************************
1258 * Function : match_portlist
1260 * Description : Check if a given number is covered by a comma
1261 * separated list of numbers and ranges (a,b-c,d,..)
1264 * 1 : portlist = String with list
1265 * 2 : port = port to check
1267 * Returns : 0 => no match
1270 *********************************************************************/
1271 int match_portlist(const char *portlist, int port)
1273 char *min, *max, *next, *portlist_copy;
1275 min = portlist_copy = strdup_or_die(portlist);
1278 * Zero-terminate first item and remember offset for next
1280 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1286 * Loop through all items, checking for match
1290 if (NULL == (max = strchr(min, (int) '-')))
1293 * No dash, check for equality
1295 if (port == atoi(min))
1297 freez(portlist_copy);
1304 * This is a range, so check if between min and max,
1305 * or, if max was omitted, between min and 65K
1308 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1310 freez(portlist_copy);
1322 * Zero-terminate next item and remember offset for n+1
1324 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1330 freez(portlist_copy);
1336 /*********************************************************************
1338 * Function : parse_forwarder_address
1340 * Description : Parse out the host and port from a forwarder address.
1343 * 1 : address = The forwarder address to parse.
1344 * 2 : hostname = Used to return the hostname. NULL on error.
1345 * 3 : port = Used to return the port. Untouched if no port
1348 * Returns : JB_ERR_OK on success
1349 * JB_ERR_MEMORY on out of memory
1350 * JB_ERR_PARSE on malformed address.
1352 *********************************************************************/
1353 jb_err parse_forwarder_address(char *address, char **hostname, int *port)
1357 if ((*address == '[') && (NULL == strchr(address, ']')))
1359 /* XXX: Should do some more validity checks here. */
1360 return JB_ERR_PARSE;
1363 *hostname = strdup_or_die(address);
1365 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1368 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1371 *port = (int)strtol(++p, NULL, 0);
1374 else if (NULL != (p = strchr(*hostname, ':')))
1377 *port = (int)strtol(p, NULL, 0);