1 const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.66 2011/12/31 14:53:18 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
6 * Purpose : Declares functions to match URLs against URL
9 * Copyright : Written by and Copyright (C) 2001-2011
10 * the Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34 *********************************************************************/
41 #include <sys/types.h>
49 #if !defined(_WIN32) && !defined(__OS2__)
59 const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
68 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern);
70 /*********************************************************************
72 * Function : free_http_request
74 * Description : Freez a http_request structure
77 * 1 : http = points to a http_request structure to free
81 *********************************************************************/
82 void free_http_request(struct http_request *http)
91 freez(http->hostport);
94 freez(http->host_ip_addr_str);
95 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
103 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
104 /*********************************************************************
106 * Function : init_domain_components
108 * Description : Splits the domain name so we can compare it
109 * against wildcards. It used to be part of
110 * parse_http_url, but was separated because the
111 * same code is required in chat in case of
112 * intercepted requests.
115 * 1 : http = pointer to the http structure to hold elements.
117 * Returns : JB_ERR_OK on success
118 * JB_ERR_MEMORY on out of memory
119 * JB_ERR_PARSE on malformed command/URL
120 * or >100 domains deep.
122 *********************************************************************/
123 jb_err init_domain_components(struct http_request *http)
125 char *vec[BUFFER_SIZE];
129 http->dbuffer = strdup(http->host);
130 if (NULL == http->dbuffer)
132 return JB_ERR_MEMORY;
135 /* map to lower case */
136 for (p = http->dbuffer; *p ; p++)
138 *p = (char)privoxy_tolower(*p);
141 /* split the domain name into components */
142 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec), 1, 1);
144 if (http->dcount <= 0)
147 * Error: More than SZ(vec) components in domain
148 * or: no components in domain
150 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
154 /* save a copy of the pointers in dvec */
155 size = (size_t)http->dcount * sizeof(*http->dvec);
157 http->dvec = (char **)malloc(size);
158 if (NULL == http->dvec)
160 return JB_ERR_MEMORY;
163 memcpy(http->dvec, vec, size);
167 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
170 /*********************************************************************
172 * Function : url_requires_percent_encoding
174 * Description : Checks if an URL contains invalid characters
175 * according to RFC 3986 that should be percent-encoded.
176 * Does not verify whether or not the passed string
177 * actually is a valid URL.
180 * 1 : url = URL to check
182 * Returns : True in case of valid URLs, false otherwise
184 *********************************************************************/
185 int url_requires_percent_encoding(const char *url)
187 static const char allowed_characters[128] = {
188 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
189 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
190 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
191 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
192 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
193 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
194 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
195 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
196 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
197 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
198 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
199 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
200 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
205 const unsigned int i = (unsigned char)*url++;
206 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
217 /*********************************************************************
219 * Function : parse_http_url
221 * Description : Parse out the host and port from the URL. Find the
222 * hostname & path, port (if ':'), and/or password (if '@')
225 * 1 : url = URL (or is it URI?) to break down
226 * 2 : http = pointer to the http structure to hold elements.
227 * Must be initialized with valid values (like NULLs).
228 * 3 : require_protocol = Whether or not URLs without
229 * protocol are acceptable.
231 * Returns : JB_ERR_OK on success
232 * JB_ERR_MEMORY on out of memory
233 * JB_ERR_PARSE on malformed command/URL
234 * or >100 domains deep.
236 *********************************************************************/
237 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
239 int host_available = 1; /* A proxy can dream. */
242 * Save our initial URL
244 http->url = strdup(url);
245 if (http->url == NULL)
247 return JB_ERR_MEMORY;
252 * Check for * URI. If found, we're done.
254 if (*http->url == '*')
256 if ( NULL == (http->path = strdup("*"))
257 || NULL == (http->hostport = strdup("")) )
259 return JB_ERR_MEMORY;
261 if (http->url[1] != '\0')
270 * Split URL into protocol,hostport,path.
280 return JB_ERR_MEMORY;
283 /* Find the start of the URL in our scratch space */
285 if (strncmpic(url_noproto, "http://", 7) == 0)
289 else if (strncmpic(url_noproto, "https://", 8) == 0)
292 * Should only happen when called from cgi_show_url_info().
297 else if (*url_noproto == '/')
300 * Short request line without protocol and host.
301 * Most likely because the client's request
302 * was intercepted and redirected into Privoxy.
307 else if (require_protocol)
313 url_path = strchr(url_noproto, '/');
314 if (url_path != NULL)
319 * NOTE: The following line ignores the path for HTTPS URLS.
320 * This means that you get consistent behaviour if you type a
321 * https URL in and it's parsed by the function. (When the
322 * URL is actually retrieved, SSL hides the path part).
324 http->path = strdup(http->ssl ? "/" : url_path);
326 http->hostport = strdup(url_noproto);
331 * Repair broken HTTP requests that don't contain a path,
332 * or CONNECT requests
334 http->path = strdup("/");
335 http->hostport = strdup(url_noproto);
340 if ( (http->path == NULL)
341 || (http->hostport == NULL))
343 return JB_ERR_MEMORY;
349 /* Without host, there is nothing left to do here */
354 * Split hostport into user/password (ignored), host, port.
361 buf = strdup(http->hostport);
364 return JB_ERR_MEMORY;
367 /* check if url contains username and/or password */
368 host = strchr(buf, '@');
371 /* Contains username/password, skip it and the @ sign. */
376 /* No username or password. */
380 /* Move after hostname before port number */
383 /* Numeric IPv6 address delimited by brackets */
385 port = strchr(host, ']');
389 /* Missing closing bracket */
400 else if (*port != ':')
402 /* Garbage after closing bracket */
409 /* Plain non-escaped hostname */
410 port = strchr(host, ':');
413 /* check if url contains port */
417 /* Terminate hostname and point to start of port string */
419 http->port = atoi(port);
423 /* No port specified. */
424 http->port = (http->ssl ? 443 : 80);
427 http->host = strdup(host);
431 if (http->host == NULL)
433 return JB_ERR_MEMORY;
437 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
440 /* Split domain name so we can compare it against wildcards */
441 return init_domain_components(http);
442 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
447 /*********************************************************************
449 * Function : unknown_method
451 * Description : Checks whether a method is unknown.
454 * 1 : method = points to a http method
456 * Returns : TRUE if it's unknown, FALSE otherwise.
458 *********************************************************************/
459 static int unknown_method(const char *method)
461 static const char * const known_http_methods[] = {
462 /* Basic HTTP request type */
463 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
464 /* webDAV extensions (RFC2518) */
465 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
467 * Microsoft webDAV extension for Exchange 2000. See:
468 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
469 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
471 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
473 * Another Microsoft webDAV extension for Exchange 2000. See:
474 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
475 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
476 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
478 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
480 * Yet another WebDAV extension, this time for
481 * Web Distributed Authoring and Versioning (RFC3253)
483 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
484 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
488 for (i = 0; i < SZ(known_http_methods); i++)
490 if (0 == strcmpic(method, known_http_methods[i]))
501 /*********************************************************************
503 * Function : parse_http_request
505 * Description : Parse out the host and port from the URL. Find the
506 * hostname & path, port (if ':'), and/or password (if '@')
509 * 1 : req = HTTP request line to break down
510 * 2 : http = pointer to the http structure to hold elements
512 * Returns : JB_ERR_OK on success
513 * JB_ERR_MEMORY on out of memory
514 * JB_ERR_CGI_PARAMS on malformed command/URL
515 * or >100 domains deep.
517 *********************************************************************/
518 jb_err parse_http_request(const char *req, struct http_request *http)
521 char *v[10]; /* XXX: Why 10? We should only need three. */
525 memset(http, '\0', sizeof(*http));
530 return JB_ERR_MEMORY;
533 n = ssplit(buf, " \r\n", v, SZ(v), 1, 1);
541 * Fail in case of unknown methods
542 * which we might not handle correctly.
544 * XXX: There should be a config option
545 * to forward requests with unknown methods
546 * anyway. Most of them don't need special
549 if (unknown_method(v[0]))
551 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
556 if (strcmpic(v[2], "HTTP/1.1") && strcmpic(v[2], "HTTP/1.0"))
558 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
559 "versions are 1.0 and 1.1. This rules out: %s", v[2]);
564 http->ssl = !strcmpic(v[0], "CONNECT");
566 err = parse_http_url(v[1], http, !http->ssl);
574 * Copy the details into the structure
576 http->cmd = strdup(req);
577 http->gpc = strdup(v[0]);
578 http->ver = strdup(v[2]);
582 if ( (http->cmd == NULL)
583 || (http->gpc == NULL)
584 || (http->ver == NULL) )
586 return JB_ERR_MEMORY;
594 /*********************************************************************
596 * Function : compile_pattern
598 * Description : Compiles a host, domain or TAG pattern.
601 * 1 : pattern = The pattern to compile.
602 * 2 : anchoring = How the regex should be modified
603 * before compilation. Can be either
604 * one of NO_ANCHORING, LEFT_ANCHORED,
605 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
606 * 3 : url = In case of failures, the spec member is
607 * logged and the structure freed.
608 * 4 : regex = Where the compiled regex should be stored.
610 * Returns : JB_ERR_OK - Success
611 * JB_ERR_MEMORY - Out of memory
612 * JB_ERR_PARSE - Cannot parse regex
614 *********************************************************************/
615 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
616 struct url_spec *url, regex_t **regex)
619 char rebuf[BUFFER_SIZE];
620 const char *fmt = NULL;
623 assert(strlen(pattern) < sizeof(rebuf) - 2);
625 if (pattern[0] == '\0')
639 case RIGHT_ANCHORED_HOST:
646 log_error(LOG_LEVEL_FATAL,
647 "Invalid anchoring in compile_pattern %d", anchoring);
650 *regex = zalloc(sizeof(**regex));
654 return JB_ERR_MEMORY;
657 snprintf(rebuf, sizeof(rebuf), fmt, pattern);
659 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
663 size_t errlen = regerror(errcode, *regex, rebuf, sizeof(rebuf));
664 if (errlen > (sizeof(rebuf) - (size_t)1))
666 errlen = sizeof(rebuf) - (size_t)1;
668 rebuf[errlen] = '\0';
669 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
670 pattern, url->spec, rebuf);
681 /*********************************************************************
683 * Function : compile_url_pattern
685 * Description : Compiles the three parts of an URL pattern.
688 * 1 : url = Target url_spec to be filled in.
689 * 2 : buf = The url pattern to compile. Will be messed up.
691 * Returns : JB_ERR_OK - Success
692 * JB_ERR_MEMORY - Out of memory
693 * JB_ERR_PARSE - Cannot parse regex
695 *********************************************************************/
696 static jb_err compile_url_pattern(struct url_spec *url, char *buf)
700 p = strchr(buf, '/');
704 * Only compile the regex if it consists of more than
705 * a single slash, otherwise it wouldn't affect the result.
710 * XXX: does it make sense to compile the slash at the beginning?
712 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->preg);
714 if (JB_ERR_OK != err)
723 * IPv6 numeric hostnames can contain colons, thus we need
724 * to delimit the hostname before the real port separator.
725 * As brackets are already used in the hostname pattern,
726 * we use angle brackets ('<', '>') instead.
728 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
735 /* IPv6 address without port number */
740 /* Garbage after address delimiter */
746 p = strchr(buf, ':');
752 url->port_list = strdup(p);
753 if (NULL == url->port_list)
755 return JB_ERR_MEMORY;
760 url->port_list = NULL;
765 return compile_host_pattern(url, buf);
773 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
774 /*********************************************************************
776 * Function : compile_host_pattern
778 * Description : Parses and compiles a host pattern.
781 * 1 : url = Target url_spec to be filled in.
782 * 2 : host_pattern = Host pattern to compile.
784 * Returns : JB_ERR_OK - Success
785 * JB_ERR_MEMORY - Out of memory
786 * JB_ERR_PARSE - Cannot parse regex
788 *********************************************************************/
789 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
791 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->host_regex);
796 /*********************************************************************
798 * Function : compile_host_pattern
800 * Description : Parses and "compiles" an old-school host pattern.
803 * 1 : url = Target url_spec to be filled in.
804 * 2 : host_pattern = Host pattern to parse.
806 * Returns : JB_ERR_OK - Success
807 * JB_ERR_MEMORY - Out of memory
808 * JB_ERR_PARSE - Cannot parse regex
810 *********************************************************************/
811 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
820 if (host_pattern[strlen(host_pattern) - 1] == '.')
822 url->unanchored |= ANCHOR_RIGHT;
824 if (host_pattern[0] == '.')
826 url->unanchored |= ANCHOR_LEFT;
830 * Split domain into components
832 url->dbuffer = strdup(host_pattern);
833 if (NULL == url->dbuffer)
836 return JB_ERR_MEMORY;
842 for (p = url->dbuffer; *p ; p++)
844 *p = (char)privoxy_tolower(*p);
848 * Split the domain name into components
850 url->dcount = ssplit(url->dbuffer, ".", v, SZ(v), 1, 1);
855 return JB_ERR_MEMORY;
857 else if (url->dcount != 0)
860 * Save a copy of the pointers in dvec
862 size = (size_t)url->dcount * sizeof(*url->dvec);
864 url->dvec = (char **)malloc(size);
865 if (NULL == url->dvec)
868 return JB_ERR_MEMORY;
871 memcpy(url->dvec, v, size);
874 * else dcount == 0 in which case we needn't do anything,
875 * since dvec will never be accessed and the pattern will
882 /*********************************************************************
884 * Function : simplematch
886 * Description : String matching, with a (greedy) '*' wildcard that
887 * stands for zero or more arbitrary characters and
888 * character classes in [], which take both enumerations
892 * 1 : pattern = pattern for matching
893 * 2 : text = text to be matched
895 * Returns : 0 if match, else nonzero
897 *********************************************************************/
898 static int simplematch(const char *pattern, const char *text)
900 const unsigned char *pat = (const unsigned char *)pattern;
901 const unsigned char *txt = (const unsigned char *)text;
902 const unsigned char *fallback = pat;
905 unsigned char lastchar = 'a';
907 unsigned char charmap[32];
912 /* EOF pattern but !EOF text? */
925 /* '*' in the pattern? */
929 /* The pattern ends afterwards? Speed up the return. */
935 /* Else, set wildcard mode and remember position after '*' */
940 /* Character range specification? */
943 memset(charmap, '\0', sizeof(charmap));
945 while (*++pat != ']')
951 else if (*pat == '-')
953 if ((*++pat == ']') || *pat == '\0')
957 for (i = lastchar; i <= *pat; i++)
959 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
964 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
968 } /* -END- if Character range specification */
972 * Char match, or char range match?
976 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))) )
986 * No match && no wildcard: No luck
990 else if (pat != fallback)
993 * Increment text pointer if in char range matching
1000 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
1004 * Restart matching from current text pointer
1011 /* Cut off extra '*'s */
1012 if(*pat == '*') pat++;
1014 /* If this is the pattern's end, fine! */
1020 /*********************************************************************
1022 * Function : simple_domaincmp
1024 * Description : Domain-wise Compare fqdn's. The comparison is
1025 * both left- and right-anchored. The individual
1026 * domain names are compared with simplematch().
1027 * This is only used by domain_match.
1030 * 1 : pv = array of patterns to compare
1031 * 2 : fv = array of domain components to compare
1032 * 3 : len = length of the arrays (both arrays are the
1033 * same length - if they weren't, it couldn't
1034 * possibly be a match).
1036 * Returns : 0 => domains are equivalent, else no match.
1038 *********************************************************************/
1039 static int simple_domaincmp(char **pv, char **fv, int len)
1043 for (n = 0; n < len; n++)
1045 if (simplematch(pv[n], fv[n]))
1056 /*********************************************************************
1058 * Function : domain_match
1060 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1061 * pattern->unachored, the comparison is un-, left-,
1062 * right-anchored, or both.
1063 * The individual domain names are compared with
1067 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1068 * 2 : fqdn = domain name against which the patterns are compared.
1070 * Returns : 0 => domains are equivalent, else no match.
1072 *********************************************************************/
1073 static int domain_match(const struct url_spec *pattern, const struct http_request *fqdn)
1075 char **pv, **fv; /* vectors */
1077 int unanchored = pattern->unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1079 plen = pattern->dcount;
1080 flen = fqdn->dcount;
1084 /* fqdn is too short to match this pattern */
1091 if (unanchored == ANCHOR_LEFT)
1096 * Convert this into a fully anchored pattern with
1097 * the fqdn and pattern the same length
1099 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1100 return simple_domaincmp(pv, fv, plen);
1102 else if (unanchored == 0)
1104 /* Fully anchored, check length */
1109 return simple_domaincmp(pv, fv, plen);
1111 else if (unanchored == ANCHOR_RIGHT)
1113 /* Left anchored, ignore all extra in fqdn */
1114 return simple_domaincmp(pv, fv, plen);
1120 int maxn = flen - plen;
1121 for (n = 0; n <= maxn; n++)
1123 if (!simple_domaincmp(pv, fv, plen))
1128 * Doesn't match from start of fqdn
1129 * Try skipping first part of fqdn
1137 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
1140 /*********************************************************************
1142 * Function : create_url_spec
1144 * Description : Creates a "url_spec" structure from a string.
1145 * When finished, free with free_url_spec().
1148 * 1 : url = Target url_spec to be filled in. Will be
1149 * zeroed before use.
1150 * 2 : buf = Source pattern, null terminated. NOTE: The
1151 * contents of this buffer are destroyed by this
1152 * function. If this function succeeds, the
1153 * buffer is copied to url->spec. If this
1154 * function fails, the contents of the buffer
1157 * Returns : JB_ERR_OK - Success
1158 * JB_ERR_MEMORY - Out of memory
1159 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1160 * written to system log)
1162 *********************************************************************/
1163 jb_err create_url_spec(struct url_spec *url, char *buf)
1168 memset(url, '\0', sizeof(*url));
1170 /* Remember the original specification for the CGI pages. */
1171 url->spec = strdup(buf);
1172 if (NULL == url->spec)
1174 return JB_ERR_MEMORY;
1177 /* Is it a tag pattern? */
1178 if (0 == strncmpic(url->spec, "TAG:", 4))
1180 /* The pattern starts with the first character after "TAG:" */
1181 const char *tag_pattern = buf + 4;
1182 return compile_pattern(tag_pattern, NO_ANCHORING, url, &url->tag_regex);
1185 /* If it isn't a tag pattern it must be an URL pattern. */
1186 return compile_url_pattern(url, buf);
1190 /*********************************************************************
1192 * Function : free_url_spec
1194 * Description : Called from the "unloaders". Freez the url
1195 * structure elements.
1198 * 1 : url = pointer to a url_spec structure.
1202 *********************************************************************/
1203 void free_url_spec(struct url_spec *url)
1205 if (url == NULL) return;
1208 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1209 if (url->host_regex)
1211 regfree(url->host_regex);
1212 freez(url->host_regex);
1215 freez(url->dbuffer);
1218 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
1219 freez(url->port_list);
1227 regfree(url->tag_regex);
1228 freez(url->tag_regex);
1233 /*********************************************************************
1235 * Function : port_matches
1237 * Description : Compares a port against a port list.
1240 * 1 : port = The port to check.
1241 * 2 : port_list = The list of port to compare with.
1243 * Returns : TRUE for yes, FALSE otherwise.
1245 *********************************************************************/
1246 static int port_matches(const int port, const char *port_list)
1248 return ((NULL == port_list) || match_portlist(port_list, port));
1252 /*********************************************************************
1254 * Function : host_matches
1256 * Description : Compares a host against a host pattern.
1259 * 1 : url = The URL to match
1260 * 2 : pattern = The URL pattern
1262 * Returns : TRUE for yes, FALSE otherwise.
1264 *********************************************************************/
1265 static int host_matches(const struct http_request *http,
1266 const struct url_spec *pattern)
1268 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1269 return ((NULL == pattern->host_regex)
1270 || (0 == regexec(pattern->host_regex, http->host, 0, NULL, 0)));
1272 return ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, http)));
1277 /*********************************************************************
1279 * Function : path_matches
1281 * Description : Compares a path against a path pattern.
1284 * 1 : path = The path to match
1285 * 2 : pattern = The URL pattern
1287 * Returns : TRUE for yes, FALSE otherwise.
1289 *********************************************************************/
1290 static int path_matches(const char *path, const struct url_spec *pattern)
1292 return ((NULL == pattern->preg)
1293 || (0 == regexec(pattern->preg, path, 0, NULL, 0)));
1297 /*********************************************************************
1299 * Function : url_match
1301 * Description : Compare a URL against a URL pattern.
1304 * 1 : pattern = a URL pattern
1305 * 2 : url = URL to match
1307 * Returns : Nonzero if the URL matches the pattern, else 0.
1309 *********************************************************************/
1310 int url_match(const struct url_spec *pattern,
1311 const struct http_request *http)
1313 if (pattern->tag_regex != NULL)
1315 /* It's a tag pattern and shouldn't be matched against URLs */
1319 return (port_matches(http->port, pattern->port_list)
1320 && host_matches(http, pattern) && path_matches(http->path, pattern));
1325 /*********************************************************************
1327 * Function : match_portlist
1329 * Description : Check if a given number is covered by a comma
1330 * separated list of numbers and ranges (a,b-c,d,..)
1333 * 1 : portlist = String with list
1334 * 2 : port = port to check
1336 * Returns : 0 => no match
1339 *********************************************************************/
1340 int match_portlist(const char *portlist, int port)
1342 char *min, *max, *next, *portlist_copy;
1344 min = portlist_copy = strdup(portlist);
1347 * Zero-terminate first item and remember offset for next
1349 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1355 * Loop through all items, checking for match
1359 if (NULL == (max = strchr(min, (int) '-')))
1362 * No dash, check for equality
1364 if (port == atoi(min))
1366 freez(portlist_copy);
1373 * This is a range, so check if between min and max,
1374 * or, if max was omitted, between min and 65K
1377 if(port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1379 freez(portlist_copy);
1391 * Zero-terminate next item and remember offset for n+1
1393 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1399 freez(portlist_copy);
1405 /*********************************************************************
1407 * Function : parse_forwarder_address
1409 * Description : Parse out the host and port from a forwarder address.
1412 * 1 : address = The forwarder address to parse.
1413 * 2 : hostname = Used to return the hostname. NULL on error.
1414 * 3 : port = Used to return the port. Untouched if no port
1417 * Returns : JB_ERR_OK on success
1418 * JB_ERR_MEMORY on out of memory
1419 * JB_ERR_PARSE on malformed address.
1421 *********************************************************************/
1422 jb_err parse_forwarder_address(char *address, char **hostname, int *port)
1426 if ((*address == '[') && (NULL == strchr(address, ']')))
1428 /* XXX: Should do some more validity checks here. */
1429 return JB_ERR_PARSE;
1432 *hostname = strdup(address);
1433 if (NULL == *hostname)
1435 return JB_ERR_MEMORY;
1438 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1441 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1444 *port = (int)strtol(++p, NULL, 0);
1447 else if (NULL != (p = strchr(*hostname, ':')))
1450 *port = (int)strtol(p, NULL, 0);