1 const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.71 2012/06/08 15:15:11 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
6 * Purpose : Declares functions to match URLs against URL
9 * Copyright : Written by and Copyright (C) 2001-2011
10 * the Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34 *********************************************************************/
41 #include <sys/types.h>
49 #if !defined(_WIN32) && !defined(__OS2__)
59 const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
68 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern);
70 /*********************************************************************
72 * Function : free_http_request
74 * Description : Freez a http_request structure
77 * 1 : http = points to a http_request structure to free
81 *********************************************************************/
82 void free_http_request(struct http_request *http)
91 freez(http->hostport);
94 freez(http->host_ip_addr_str);
95 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
103 #ifndef FEATURE_EXTENDED_HOST_PATTERNS
104 /*********************************************************************
106 * Function : init_domain_components
108 * Description : Splits the domain name so we can compare it
109 * against wildcards. It used to be part of
110 * parse_http_url, but was separated because the
111 * same code is required in chat in case of
112 * intercepted requests.
115 * 1 : http = pointer to the http structure to hold elements.
117 * Returns : JB_ERR_OK on success
118 * JB_ERR_MEMORY on out of memory
119 * JB_ERR_PARSE on malformed command/URL
120 * or >100 domains deep.
122 *********************************************************************/
123 jb_err init_domain_components(struct http_request *http)
125 char *vec[BUFFER_SIZE];
129 http->dbuffer = strdup(http->host);
130 if (NULL == http->dbuffer)
132 return JB_ERR_MEMORY;
135 /* map to lower case */
136 for (p = http->dbuffer; *p ; p++)
138 *p = (char)privoxy_tolower(*p);
141 /* split the domain name into components */
142 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
144 if (http->dcount <= 0)
147 * Error: More than SZ(vec) components in domain
148 * or: no components in domain
150 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
154 /* save a copy of the pointers in dvec */
155 size = (size_t)http->dcount * sizeof(*http->dvec);
157 http->dvec = malloc_or_die(size);
159 memcpy(http->dvec, vec, size);
163 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
166 /*********************************************************************
168 * Function : url_requires_percent_encoding
170 * Description : Checks if an URL contains invalid characters
171 * according to RFC 3986 that should be percent-encoded.
172 * Does not verify whether or not the passed string
173 * actually is a valid URL.
176 * 1 : url = URL to check
178 * Returns : True in case of valid URLs, false otherwise
180 *********************************************************************/
181 int url_requires_percent_encoding(const char *url)
183 static const char allowed_characters[128] = {
184 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
185 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
186 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
187 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
188 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
189 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
190 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
191 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
192 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
193 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
194 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
195 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
196 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
201 const unsigned int i = (unsigned char)*url++;
202 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
213 /*********************************************************************
215 * Function : parse_http_url
217 * Description : Parse out the host and port from the URL. Find the
218 * hostname & path, port (if ':'), and/or password (if '@')
221 * 1 : url = URL (or is it URI?) to break down
222 * 2 : http = pointer to the http structure to hold elements.
223 * Must be initialized with valid values (like NULLs).
224 * 3 : require_protocol = Whether or not URLs without
225 * protocol are acceptable.
227 * Returns : JB_ERR_OK on success
228 * JB_ERR_MEMORY on out of memory
229 * JB_ERR_PARSE on malformed command/URL
230 * or >100 domains deep.
232 *********************************************************************/
233 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
235 int host_available = 1; /* A proxy can dream. */
238 * Save our initial URL
240 http->url = strdup(url);
241 if (http->url == NULL)
243 return JB_ERR_MEMORY;
248 * Check for * URI. If found, we're done.
250 if (*http->url == '*')
252 if (NULL == (http->path = strdup("*"))
253 || NULL == (http->hostport = strdup("")))
255 return JB_ERR_MEMORY;
257 if (http->url[1] != '\0')
266 * Split URL into protocol,hostport,path.
276 return JB_ERR_MEMORY;
279 /* Find the start of the URL in our scratch space */
281 if (strncmpic(url_noproto, "http://", 7) == 0)
285 else if (strncmpic(url_noproto, "https://", 8) == 0)
288 * Should only happen when called from cgi_show_url_info().
293 else if (*url_noproto == '/')
296 * Short request line without protocol and host.
297 * Most likely because the client's request
298 * was intercepted and redirected into Privoxy.
303 else if (require_protocol)
309 url_path = strchr(url_noproto, '/');
310 if (url_path != NULL)
315 * NOTE: The following line ignores the path for HTTPS URLS.
316 * This means that you get consistent behaviour if you type a
317 * https URL in and it's parsed by the function. (When the
318 * URL is actually retrieved, SSL hides the path part).
320 http->path = strdup(http->ssl ? "/" : url_path);
322 http->hostport = strdup(url_noproto);
327 * Repair broken HTTP requests that don't contain a path,
328 * or CONNECT requests
330 http->path = strdup("/");
331 http->hostport = strdup(url_noproto);
336 if ((http->path == NULL)
337 || (http->hostport == NULL))
339 return JB_ERR_MEMORY;
345 /* Without host, there is nothing left to do here */
350 * Split hostport into user/password (ignored), host, port.
357 buf = strdup(http->hostport);
360 return JB_ERR_MEMORY;
363 /* check if url contains username and/or password */
364 host = strchr(buf, '@');
367 /* Contains username/password, skip it and the @ sign. */
372 /* No username or password. */
376 /* Move after hostname before port number */
379 /* Numeric IPv6 address delimited by brackets */
381 port = strchr(host, ']');
385 /* Missing closing bracket */
396 else if (*port != ':')
398 /* Garbage after closing bracket */
405 /* Plain non-escaped hostname */
406 port = strchr(host, ':');
409 /* check if url contains port */
413 /* Terminate hostname and point to start of port string */
415 http->port = atoi(port);
419 /* No port specified. */
420 http->port = (http->ssl ? 443 : 80);
423 http->host = strdup(host);
427 if (http->host == NULL)
429 return JB_ERR_MEMORY;
433 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
436 /* Split domain name so we can compare it against wildcards */
437 return init_domain_components(http);
438 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
443 /*********************************************************************
445 * Function : unknown_method
447 * Description : Checks whether a method is unknown.
450 * 1 : method = points to a http method
452 * Returns : TRUE if it's unknown, FALSE otherwise.
454 *********************************************************************/
455 static int unknown_method(const char *method)
457 static const char * const known_http_methods[] = {
458 /* Basic HTTP request type */
459 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
460 /* webDAV extensions (RFC2518) */
461 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
463 * Microsoft webDAV extension for Exchange 2000. See:
464 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
465 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
467 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
469 * Another Microsoft webDAV extension for Exchange 2000. See:
470 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
471 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
472 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
474 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
476 * Yet another WebDAV extension, this time for
477 * Web Distributed Authoring and Versioning (RFC3253)
479 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
480 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
484 for (i = 0; i < SZ(known_http_methods); i++)
486 if (0 == strcmpic(method, known_http_methods[i]))
497 /*********************************************************************
499 * Function : parse_http_request
501 * Description : Parse out the host and port from the URL. Find the
502 * hostname & path, port (if ':'), and/or password (if '@')
505 * 1 : req = HTTP request line to break down
506 * 2 : http = pointer to the http structure to hold elements
508 * Returns : JB_ERR_OK on success
509 * JB_ERR_MEMORY on out of memory
510 * JB_ERR_CGI_PARAMS on malformed command/URL
511 * or >100 domains deep.
513 *********************************************************************/
514 jb_err parse_http_request(const char *req, struct http_request *http)
517 char *v[10]; /* XXX: Why 10? We should only need three. */
521 memset(http, '\0', sizeof(*http));
526 return JB_ERR_MEMORY;
529 n = ssplit(buf, " \r\n", v, SZ(v));
537 * Fail in case of unknown methods
538 * which we might not handle correctly.
540 * XXX: There should be a config option
541 * to forward requests with unknown methods
542 * anyway. Most of them don't need special
545 if (unknown_method(v[0]))
547 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
552 if (strcmpic(v[2], "HTTP/1.1") && strcmpic(v[2], "HTTP/1.0"))
554 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
555 "versions are 1.0 and 1.1. This rules out: %s", v[2]);
560 http->ssl = !strcmpic(v[0], "CONNECT");
562 err = parse_http_url(v[1], http, !http->ssl);
570 * Copy the details into the structure
572 http->cmd = strdup(req);
573 http->gpc = strdup(v[0]);
574 http->ver = strdup(v[2]);
578 if ( (http->cmd == NULL)
579 || (http->gpc == NULL)
580 || (http->ver == NULL))
582 return JB_ERR_MEMORY;
590 /*********************************************************************
592 * Function : compile_pattern
594 * Description : Compiles a host, domain or TAG pattern.
597 * 1 : pattern = The pattern to compile.
598 * 2 : anchoring = How the regex should be modified
599 * before compilation. Can be either
600 * one of NO_ANCHORING, LEFT_ANCHORED,
601 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
602 * 3 : url = In case of failures, the spec member is
603 * logged and the structure freed.
604 * 4 : regex = Where the compiled regex should be stored.
606 * Returns : JB_ERR_OK - Success
607 * JB_ERR_MEMORY - Out of memory
608 * JB_ERR_PARSE - Cannot parse regex
610 *********************************************************************/
611 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
612 struct url_spec *url, regex_t **regex)
615 char rebuf[BUFFER_SIZE];
616 const char *fmt = NULL;
619 assert(strlen(pattern) < sizeof(rebuf) - 2);
621 if (pattern[0] == '\0')
635 case RIGHT_ANCHORED_HOST:
642 log_error(LOG_LEVEL_FATAL,
643 "Invalid anchoring in compile_pattern %d", anchoring);
646 *regex = zalloc(sizeof(**regex));
650 return JB_ERR_MEMORY;
653 snprintf(rebuf, sizeof(rebuf), fmt, pattern);
655 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
659 size_t errlen = regerror(errcode, *regex, rebuf, sizeof(rebuf));
660 if (errlen > (sizeof(rebuf) - (size_t)1))
662 errlen = sizeof(rebuf) - (size_t)1;
664 rebuf[errlen] = '\0';
665 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
666 pattern, url->spec, rebuf);
677 /*********************************************************************
679 * Function : compile_url_pattern
681 * Description : Compiles the three parts of an URL pattern.
684 * 1 : url = Target url_spec to be filled in.
685 * 2 : buf = The url pattern to compile. Will be messed up.
687 * Returns : JB_ERR_OK - Success
688 * JB_ERR_MEMORY - Out of memory
689 * JB_ERR_PARSE - Cannot parse regex
691 *********************************************************************/
692 static jb_err compile_url_pattern(struct url_spec *url, char *buf)
696 p = strchr(buf, '/');
700 * Only compile the regex if it consists of more than
701 * a single slash, otherwise it wouldn't affect the result.
706 * XXX: does it make sense to compile the slash at the beginning?
708 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->preg);
710 if (JB_ERR_OK != err)
719 * IPv6 numeric hostnames can contain colons, thus we need
720 * to delimit the hostname before the real port separator.
721 * As brackets are already used in the hostname pattern,
722 * we use angle brackets ('<', '>') instead.
724 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
731 /* IPv6 address without port number */
736 /* Garbage after address delimiter */
742 p = strchr(buf, ':');
748 url->port_list = strdup(p);
749 if (NULL == url->port_list)
751 return JB_ERR_MEMORY;
756 url->port_list = NULL;
761 return compile_host_pattern(url, buf);
769 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
770 /*********************************************************************
772 * Function : compile_host_pattern
774 * Description : Parses and compiles a host pattern.
777 * 1 : url = Target url_spec to be filled in.
778 * 2 : host_pattern = Host pattern to compile.
780 * Returns : JB_ERR_OK - Success
781 * JB_ERR_MEMORY - Out of memory
782 * JB_ERR_PARSE - Cannot parse regex
784 *********************************************************************/
785 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
787 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->host_regex);
792 /*********************************************************************
794 * Function : compile_host_pattern
796 * Description : Parses and "compiles" an old-school host pattern.
799 * 1 : url = Target url_spec to be filled in.
800 * 2 : host_pattern = Host pattern to parse.
802 * Returns : JB_ERR_OK - Success
803 * JB_ERR_MEMORY - Out of memory
804 * JB_ERR_PARSE - Cannot parse regex
806 *********************************************************************/
807 static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
816 if (host_pattern[strlen(host_pattern) - 1] == '.')
818 url->unanchored |= ANCHOR_RIGHT;
820 if (host_pattern[0] == '.')
822 url->unanchored |= ANCHOR_LEFT;
826 * Split domain into components
828 url->dbuffer = strdup(host_pattern);
829 if (NULL == url->dbuffer)
832 return JB_ERR_MEMORY;
838 for (p = url->dbuffer; *p ; p++)
840 *p = (char)privoxy_tolower(*p);
844 * Split the domain name into components
846 url->dcount = ssplit(url->dbuffer, ".", v, SZ(v));
851 return JB_ERR_MEMORY;
853 else if (url->dcount != 0)
856 * Save a copy of the pointers in dvec
858 size = (size_t)url->dcount * sizeof(*url->dvec);
860 url->dvec = malloc_or_die(size);
862 memcpy(url->dvec, v, size);
865 * else dcount == 0 in which case we needn't do anything,
866 * since dvec will never be accessed and the pattern will
873 /*********************************************************************
875 * Function : simplematch
877 * Description : String matching, with a (greedy) '*' wildcard that
878 * stands for zero or more arbitrary characters and
879 * character classes in [], which take both enumerations
883 * 1 : pattern = pattern for matching
884 * 2 : text = text to be matched
886 * Returns : 0 if match, else nonzero
888 *********************************************************************/
889 static int simplematch(const char *pattern, const char *text)
891 const unsigned char *pat = (const unsigned char *)pattern;
892 const unsigned char *txt = (const unsigned char *)text;
893 const unsigned char *fallback = pat;
896 unsigned char lastchar = 'a';
898 unsigned char charmap[32];
903 /* EOF pattern but !EOF text? */
916 /* '*' in the pattern? */
920 /* The pattern ends afterwards? Speed up the return. */
926 /* Else, set wildcard mode and remember position after '*' */
931 /* Character range specification? */
934 memset(charmap, '\0', sizeof(charmap));
936 while (*++pat != ']')
942 else if (*pat == '-')
944 if ((*++pat == ']') || *pat == '\0')
948 for (i = lastchar; i <= *pat; i++)
950 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
955 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
959 } /* -END- if Character range specification */
963 * Char match, or char range match?
967 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
977 * No match && no wildcard: No luck
981 else if (pat != fallback)
984 * Increment text pointer if in char range matching
991 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
995 * Restart matching from current text pointer
1002 /* Cut off extra '*'s */
1003 if (*pat == '*') pat++;
1005 /* If this is the pattern's end, fine! */
1011 /*********************************************************************
1013 * Function : simple_domaincmp
1015 * Description : Domain-wise Compare fqdn's. The comparison is
1016 * both left- and right-anchored. The individual
1017 * domain names are compared with simplematch().
1018 * This is only used by domain_match.
1021 * 1 : pv = array of patterns to compare
1022 * 2 : fv = array of domain components to compare
1023 * 3 : len = length of the arrays (both arrays are the
1024 * same length - if they weren't, it couldn't
1025 * possibly be a match).
1027 * Returns : 0 => domains are equivalent, else no match.
1029 *********************************************************************/
1030 static int simple_domaincmp(char **pv, char **fv, int len)
1034 for (n = 0; n < len; n++)
1036 if (simplematch(pv[n], fv[n]))
1047 /*********************************************************************
1049 * Function : domain_match
1051 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1052 * pattern->unachored, the comparison is un-, left-,
1053 * right-anchored, or both.
1054 * The individual domain names are compared with
1058 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1059 * 2 : fqdn = domain name against which the patterns are compared.
1061 * Returns : 0 => domains are equivalent, else no match.
1063 *********************************************************************/
1064 static int domain_match(const struct url_spec *pattern, const struct http_request *fqdn)
1066 char **pv, **fv; /* vectors */
1068 int unanchored = pattern->unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1070 plen = pattern->dcount;
1071 flen = fqdn->dcount;
1075 /* fqdn is too short to match this pattern */
1082 if (unanchored == ANCHOR_LEFT)
1087 * Convert this into a fully anchored pattern with
1088 * the fqdn and pattern the same length
1090 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1091 return simple_domaincmp(pv, fv, plen);
1093 else if (unanchored == 0)
1095 /* Fully anchored, check length */
1100 return simple_domaincmp(pv, fv, plen);
1102 else if (unanchored == ANCHOR_RIGHT)
1104 /* Left anchored, ignore all extra in fqdn */
1105 return simple_domaincmp(pv, fv, plen);
1111 int maxn = flen - plen;
1112 for (n = 0; n <= maxn; n++)
1114 if (!simple_domaincmp(pv, fv, plen))
1119 * Doesn't match from start of fqdn
1120 * Try skipping first part of fqdn
1128 #endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
1131 /*********************************************************************
1133 * Function : create_url_spec
1135 * Description : Creates a "url_spec" structure from a string.
1136 * When finished, free with free_url_spec().
1139 * 1 : url = Target url_spec to be filled in. Will be
1140 * zeroed before use.
1141 * 2 : buf = Source pattern, null terminated. NOTE: The
1142 * contents of this buffer are destroyed by this
1143 * function. If this function succeeds, the
1144 * buffer is copied to url->spec. If this
1145 * function fails, the contents of the buffer
1148 * Returns : JB_ERR_OK - Success
1149 * JB_ERR_MEMORY - Out of memory
1150 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1151 * written to system log)
1153 *********************************************************************/
1154 jb_err create_url_spec(struct url_spec *url, char *buf)
1159 memset(url, '\0', sizeof(*url));
1161 /* Remember the original specification for the CGI pages. */
1162 url->spec = strdup(buf);
1163 if (NULL == url->spec)
1165 return JB_ERR_MEMORY;
1168 /* Is it a tag pattern? */
1169 if (0 == strncmpic(url->spec, "TAG:", 4))
1171 /* The pattern starts with the first character after "TAG:" */
1172 const char *tag_pattern = buf + 4;
1173 return compile_pattern(tag_pattern, NO_ANCHORING, url, &url->tag_regex);
1176 /* If it isn't a tag pattern it must be an URL pattern. */
1177 return compile_url_pattern(url, buf);
1181 /*********************************************************************
1183 * Function : free_url_spec
1185 * Description : Called from the "unloaders". Freez the url
1186 * structure elements.
1189 * 1 : url = pointer to a url_spec structure.
1193 *********************************************************************/
1194 void free_url_spec(struct url_spec *url)
1196 if (url == NULL) return;
1199 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1200 if (url->host_regex)
1202 regfree(url->host_regex);
1203 freez(url->host_regex);
1206 freez(url->dbuffer);
1209 #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
1210 freez(url->port_list);
1218 regfree(url->tag_regex);
1219 freez(url->tag_regex);
1224 /*********************************************************************
1226 * Function : port_matches
1228 * Description : Compares a port against a port list.
1231 * 1 : port = The port to check.
1232 * 2 : port_list = The list of port to compare with.
1234 * Returns : TRUE for yes, FALSE otherwise.
1236 *********************************************************************/
1237 static int port_matches(const int port, const char *port_list)
1239 return ((NULL == port_list) || match_portlist(port_list, port));
1243 /*********************************************************************
1245 * Function : host_matches
1247 * Description : Compares a host against a host pattern.
1250 * 1 : url = The URL to match
1251 * 2 : pattern = The URL pattern
1253 * Returns : TRUE for yes, FALSE otherwise.
1255 *********************************************************************/
1256 static int host_matches(const struct http_request *http,
1257 const struct url_spec *pattern)
1259 #ifdef FEATURE_EXTENDED_HOST_PATTERNS
1260 return ((NULL == pattern->host_regex)
1261 || (0 == regexec(pattern->host_regex, http->host, 0, NULL, 0)));
1263 return ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, http)));
1268 /*********************************************************************
1270 * Function : path_matches
1272 * Description : Compares a path against a path pattern.
1275 * 1 : path = The path to match
1276 * 2 : pattern = The URL pattern
1278 * Returns : TRUE for yes, FALSE otherwise.
1280 *********************************************************************/
1281 static int path_matches(const char *path, const struct url_spec *pattern)
1283 return ((NULL == pattern->preg)
1284 || (0 == regexec(pattern->preg, path, 0, NULL, 0)));
1288 /*********************************************************************
1290 * Function : url_match
1292 * Description : Compare a URL against a URL pattern.
1295 * 1 : pattern = a URL pattern
1296 * 2 : url = URL to match
1298 * Returns : Nonzero if the URL matches the pattern, else 0.
1300 *********************************************************************/
1301 int url_match(const struct url_spec *pattern,
1302 const struct http_request *http)
1304 if (pattern->tag_regex != NULL)
1306 /* It's a tag pattern and shouldn't be matched against URLs */
1310 return (port_matches(http->port, pattern->port_list)
1311 && host_matches(http, pattern) && path_matches(http->path, pattern));
1316 /*********************************************************************
1318 * Function : match_portlist
1320 * Description : Check if a given number is covered by a comma
1321 * separated list of numbers and ranges (a,b-c,d,..)
1324 * 1 : portlist = String with list
1325 * 2 : port = port to check
1327 * Returns : 0 => no match
1330 *********************************************************************/
1331 int match_portlist(const char *portlist, int port)
1333 char *min, *max, *next, *portlist_copy;
1335 min = portlist_copy = strdup(portlist);
1338 * Zero-terminate first item and remember offset for next
1340 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1346 * Loop through all items, checking for match
1350 if (NULL == (max = strchr(min, (int) '-')))
1353 * No dash, check for equality
1355 if (port == atoi(min))
1357 freez(portlist_copy);
1364 * This is a range, so check if between min and max,
1365 * or, if max was omitted, between min and 65K
1368 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1370 freez(portlist_copy);
1382 * Zero-terminate next item and remember offset for n+1
1384 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1390 freez(portlist_copy);
1396 /*********************************************************************
1398 * Function : parse_forwarder_address
1400 * Description : Parse out the host and port from a forwarder address.
1403 * 1 : address = The forwarder address to parse.
1404 * 2 : hostname = Used to return the hostname. NULL on error.
1405 * 3 : port = Used to return the port. Untouched if no port
1408 * Returns : JB_ERR_OK on success
1409 * JB_ERR_MEMORY on out of memory
1410 * JB_ERR_PARSE on malformed address.
1412 *********************************************************************/
1413 jb_err parse_forwarder_address(char *address, char **hostname, int *port)
1417 if ((*address == '[') && (NULL == strchr(address, ']')))
1419 /* XXX: Should do some more validity checks here. */
1420 return JB_ERR_PARSE;
1423 *hostname = strdup(address);
1424 if (NULL == *hostname)
1426 return JB_ERR_MEMORY;
1429 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1432 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1435 *port = (int)strtol(++p, NULL, 0);
1438 else if (NULL != (p = strchr(*hostname, ':')))
1441 *port = (int)strtol(p, NULL, 0);