1 /*********************************************************************
3 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
5 * Purpose : Declares functions to match URLs against URL
8 * Copyright : Written by and Copyright (C) 2001-2020
9 * the Privoxy team. https://www.privoxy.org/
11 * Based on the Internet Junkbuster originally written
12 * by and Copyright (C) 1997 Anonymous Coders and
13 * Junkbusters Corporation. http://www.junkbusters.com
15 * This program is free software; you can redistribute it
16 * and/or modify it under the terms of the GNU General
17 * Public License as published by the Free Software
18 * Foundation; either version 2 of the License, or (at
19 * your option) any later version.
21 * This program is distributed in the hope that it will
22 * be useful, but WITHOUT ANY WARRANTY; without even the
23 * implied warranty of MERCHANTABILITY or FITNESS FOR A
24 * PARTICULAR PURPOSE. See the GNU General Public
25 * License for more details.
27 * The GNU General Public License should be included with
28 * this file. If not, you can view it at
29 * http://www.gnu.org/copyleft/gpl.html
30 * or write to the Free Software Foundation, Inc., 59
31 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
33 *********************************************************************/
40 #include <sys/types.h>
65 static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern);
66 #ifdef FEATURE_PCRE_HOST_PATTERNS
67 static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern);
70 /*********************************************************************
72 * Function : free_http_request
74 * Description : Freez a http_request structure
77 * 1 : http = points to a http_request structure to free
81 *********************************************************************/
82 void free_http_request(struct http_request *http)
91 freez(http->hostport);
94 freez(http->host_ip_addr_str);
101 /*********************************************************************
103 * Function : init_domain_components
105 * Description : Splits the domain name so we can compare it
106 * against wildcards. It used to be part of
107 * parse_http_url, but was separated because the
108 * same code is required in chat() in case of
109 * intercepted requests.
112 * 1 : http = pointer to the http structure to hold elements.
114 * Returns : JB_ERR_OK on success
115 * JB_ERR_PARSE on malformed command/URL
116 * or >100 domains deep.
118 *********************************************************************/
119 jb_err init_domain_components(struct http_request *http)
121 char *vec[BUFFER_SIZE];
125 http->dbuffer = strdup_or_die(http->host);
127 /* map to lower case */
128 for (p = http->dbuffer; *p ; p++)
130 *p = (char)privoxy_tolower(*p);
133 /* split the domain name into components */
134 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec));
136 if (http->dcount <= 0)
139 * Error: More than SZ(vec) components in domain
140 * or: no components in domain
142 log_error(LOG_LEVEL_ERROR, "More than SZ(vec) components in domain or none at all.");
146 /* save a copy of the pointers in dvec */
147 size = (size_t)http->dcount * sizeof(*http->dvec);
149 http->dvec = malloc_or_die(size);
151 memcpy(http->dvec, vec, size);
157 /*********************************************************************
159 * Function : url_requires_percent_encoding
161 * Description : Checks if an URL contains invalid characters
162 * according to RFC 3986 that should be percent-encoded.
163 * Does not verify whether or not the passed string
164 * actually is a valid URL.
167 * 1 : url = URL to check
169 * Returns : True in case of valid URLs, false otherwise
171 *********************************************************************/
172 int url_requires_percent_encoding(const char *url)
174 static const char allowed_characters[128] = {
175 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
176 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
177 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
178 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
179 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
180 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
181 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
182 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
183 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
184 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
185 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
186 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
187 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
192 const unsigned int i = (unsigned char)*url++;
193 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
204 /*********************************************************************
206 * Function : parse_http_url
208 * Description : Parse out the host and port from the URL. Find the
209 * hostname & path, port (if ':'), and/or password (if '@')
212 * 1 : url = URL (or is it URI?) to break down
213 * 2 : http = pointer to the http structure to hold elements.
214 * Must be initialized with valid values (like NULLs).
215 * 3 : require_protocol = Whether or not URLs without
216 * protocol are acceptable.
218 * Returns : JB_ERR_OK on success
219 * JB_ERR_PARSE on malformed command/URL
220 * or >100 domains deep.
222 *********************************************************************/
223 jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
225 int host_available = 1; /* A proxy can dream. */
228 * Save our initial URL
230 http->url = strdup_or_die(url);
233 * Check for * URI. If found, we're done.
235 if (*http->url == '*')
237 http->path = strdup_or_die("*");
238 http->hostport = strdup_or_die("");
239 if (http->url[1] != '\0')
248 * Split URL into protocol, hostport, path.
255 buf = strdup_or_die(url);
257 /* Find the start of the URL in our scratch space */
259 if (strncmpic(url_noproto, "http://", 7) == 0)
263 else if (strncmpic(url_noproto, "https://", 8) == 0)
266 * Should only happen when called from cgi_show_url_info()
267 * or when the request was https-inspected and the request
268 * line got rewritten.
273 else if (*url_noproto == '/')
276 * Short request line without protocol and host.
277 * Most likely because the client's request
278 * was intercepted and redirected into Privoxy.
283 else if (require_protocol)
289 url_path = strchr(url_noproto, '/');
290 if (url_path != NULL)
295 * If FEATURE_HTTPS_INSPECTION isn't available, ignore the
296 * path for https URLs so that we get consistent behaviour
297 * if a https URL is parsed. When the URL is actually
298 * retrieved, https hides the path part.
300 http->path = strdup_or_die(
301 #ifndef FEATURE_HTTPS_INSPECTION
307 http->hostport = string_tolower(url_noproto);
312 * Repair broken HTTP requests that don't contain a path,
313 * or CONNECT requests
315 http->path = strdup_or_die("/");
316 http->hostport = string_tolower(url_noproto);
321 if (http->hostport == NULL)
329 /* Without host, there is nothing left to do here */
334 * Split hostport into user/password (ignored), host, port.
341 buf = strdup_or_die(http->hostport);
343 /* check if url contains username and/or password */
344 host = strchr(buf, '@');
347 /* Contains username/password, skip it and the @ sign. */
352 /* No username or password. */
356 /* Move after hostname before port number */
359 /* Numeric IPv6 address delimited by brackets */
361 port = strchr(host, ']');
365 /* Missing closing bracket */
376 else if (*port != ':')
378 /* Garbage after closing bracket */
385 /* Plain non-escaped hostname */
386 port = strchr(host, ':');
389 /* check if url contains port */
395 /* Terminate hostname and point to start of port string */
397 parsed_port = strtol(port, &endptr, 10);
398 if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0'))
400 log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url);
404 http->port = (int)parsed_port;
408 /* No port specified. */
409 http->port = (http->ssl ? 443 : 80);
412 http->host = strdup_or_die(host);
417 /* Split domain name so we can compare it against wildcards */
418 return init_domain_components(http);
423 /*********************************************************************
425 * Function : unknown_method
427 * Description : Checks whether a method is unknown.
430 * 1 : method = points to a http method
432 * Returns : TRUE if it's unknown, FALSE otherwise.
434 *********************************************************************/
435 static int unknown_method(const char *method)
437 static const char * const known_http_methods[] = {
438 /* Basic HTTP request type */
439 "GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "TRACE", "CONNECT",
440 /* webDAV extensions (RFC2518) */
441 "PROPFIND", "PROPPATCH", "MOVE", "COPY", "MKCOL", "LOCK", "UNLOCK",
443 * Microsoft webDAV extension for Exchange 2000. See:
444 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
445 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
447 "BCOPY", "BMOVE", "BDELETE", "BPROPFIND", "BPROPPATCH",
449 * Another Microsoft webDAV extension for Exchange 2000. See:
450 * http://systems.cs.colorado.edu/grunwald/MobileComputing/Papers/draft-cohen-gena-p-base-00.txt
451 * http://lists.w3.org/Archives/Public/w3c-dist-auth/2002JanMar/0001.html
452 * http://msdn.microsoft.com/library/en-us/wss/wss/_webdav_methods.asp
454 "SUBSCRIBE", "UNSUBSCRIBE", "NOTIFY", "POLL",
456 * Yet another WebDAV extension, this time for
457 * Web Distributed Authoring and Versioning (RFC3253)
459 "VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
460 "MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
462 * The PATCH method is defined by RFC5789, the format of the
463 * actual patch in the body depends on the application, but from
464 * Privoxy's point of view it doesn't matter.
470 for (i = 0; i < SZ(known_http_methods); i++)
472 if (0 == strcmpic(method, known_http_methods[i]))
483 /*********************************************************************
485 * Function : normalize_http_version
487 * Description : Take a supported HTTP version string and remove
488 * leading zeroes etc., reject unsupported versions.
490 * This is an explicit RFC 2616 (3.1) MUST and
491 * RFC 7230 mandates that intermediaries send their
492 * own HTTP-version in forwarded messages.
495 * 1 : http_version = HTTP version string
497 * Returns : JB_ERR_OK on success
498 * JB_ERR_PARSE if the HTTP version is unsupported
500 *********************************************************************/
501 static jb_err normalize_http_version(char *http_version)
503 unsigned int major_version;
504 unsigned int minor_version;
506 if (2 != sscanf(http_version, "HTTP/%u.%u", &major_version, &minor_version))
508 log_error(LOG_LEVEL_ERROR, "Unsupported HTTP version: %s", http_version);
512 if (major_version != 1 || (minor_version != 0 && minor_version != 1))
514 log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
515 "versions are 1.0 and 1.1. This rules out: %s", http_version);
519 assert(strlen(http_version) >= 8);
520 snprintf(http_version, 9, "HTTP/%u.%u", major_version, minor_version);
527 /*********************************************************************
529 * Function : parse_http_request
531 * Description : Parse out the host and port from the URL. Find the
532 * hostname & path, port (if ':'), and/or password (if '@')
535 * 1 : req = HTTP request line to break down
536 * 2 : http = pointer to the http structure to hold elements
538 * Returns : JB_ERR_OK on success
539 * JB_ERR_CGI_PARAMS on malformed command/URL
540 * or >100 domains deep.
542 *********************************************************************/
543 jb_err parse_http_request(const char *req, struct http_request *http)
550 memset(http, '\0', sizeof(*http));
552 buf = strdup_or_die(req);
554 n = ssplit(buf, " \r\n", v, SZ(v));
562 * Fail in case of unknown methods
563 * which we might not handle correctly.
565 * XXX: There should be a config option
566 * to forward requests with unknown methods
567 * anyway. Most of them don't need special
570 if (unknown_method(v[0]))
572 log_error(LOG_LEVEL_ERROR, "Unknown HTTP method detected: %s", v[0]);
577 if (JB_ERR_OK != normalize_http_version(v[2]))
583 http->ssl = !strcmpic(v[0], "CONNECT");
585 err = parse_http_url(v[1], http, !http->ssl);
593 * Copy the details into the structure
595 http->cmd = strdup_or_die(req);
596 http->gpc = strdup_or_die(v[0]);
597 http->version = strdup_or_die(v[2]);
598 http->ocmd = strdup_or_die(http->cmd);
608 /*********************************************************************
610 * Function : compile_pattern
612 * Description : Compiles a host, domain or TAG pattern.
615 * 1 : pattern = The pattern to compile.
616 * 2 : anchoring = How the regex should be modified
617 * before compilation. Can be either
618 * one of NO_ANCHORING, LEFT_ANCHORED,
619 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
620 * 3 : url = In case of failures, the spec member is
621 * logged and the structure freed.
622 * 4 : regex = Where the compiled regex should be stored.
624 * Returns : JB_ERR_OK - Success
625 * JB_ERR_PARSE - Cannot parse regex
627 *********************************************************************/
628 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
629 struct pattern_spec *url, pcre2_code **regex)
632 const char *fmt = NULL;
635 PCRE2_SIZE error_offset;
640 if (pattern[0] == '\0')
654 case RIGHT_ANCHORED_HOST:
661 log_error(LOG_LEVEL_FATAL,
662 "Invalid anchoring in compile_pattern %d", anchoring);
664 rebuf_size = strlen(pattern) + strlen(fmt);
665 rebuf = malloc_or_die(rebuf_size);
667 snprintf(rebuf, rebuf_size, fmt, pattern);
669 *regex = pcre2_compile((const unsigned char *)rebuf,
670 PCRE2_ZERO_TERMINATED, PCRE2_CASELESS, &errcode,
671 &error_offset, NULL);
674 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
675 pattern, url->spec, rebuf);
681 #ifndef DISABLE_PCRE_JIT_COMPILATION
682 /* Try to enable JIT compilation but continue if it's unsupported. */
683 if ((ret = pcre2_jit_compile(*regex, PCRE2_JIT_COMPLETE)) &&
684 (ret != PCRE2_ERROR_JIT_BADOPTION))
686 log_error(LOG_LEVEL_ERROR,
687 "Unexpected error enabling JIT compilation for %s from %s: %s",
688 pattern, url->spec, rebuf);
701 /*********************************************************************
703 * Function : compile_pattern
705 * Description : Compiles a host, domain or TAG pattern.
708 * 1 : pattern = The pattern to compile.
709 * 2 : anchoring = How the regex should be modified
710 * before compilation. Can be either
711 * one of NO_ANCHORING, LEFT_ANCHORED,
712 * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
713 * 3 : url = In case of failures, the spec member is
714 * logged and the structure freed.
715 * 4 : regex = Where the compiled regex should be stored.
717 * Returns : JB_ERR_OK - Success
718 * JB_ERR_PARSE - Cannot parse regex
720 *********************************************************************/
721 static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
722 struct pattern_spec *url, regex_t **regex)
725 const char *fmt = NULL;
731 if (pattern[0] == '\0')
745 case RIGHT_ANCHORED_HOST:
752 log_error(LOG_LEVEL_FATAL,
753 "Invalid anchoring in compile_pattern %d", anchoring);
755 rebuf_size = strlen(pattern) + strlen(fmt);
756 rebuf = malloc_or_die(rebuf_size);
757 *regex = zalloc_or_die(sizeof(**regex));
759 snprintf(rebuf, rebuf_size, fmt, pattern);
761 errcode = regcomp(*regex, rebuf, (REG_EXTENDED|REG_NOSUB|REG_ICASE));
765 size_t errlen = regerror(errcode, *regex, rebuf, rebuf_size);
766 if (errlen > (rebuf_size - (size_t)1))
768 errlen = rebuf_size - (size_t)1;
770 rebuf[errlen] = '\0';
771 log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
772 pattern, url->spec, rebuf);
773 free_pattern_spec(url);
786 /*********************************************************************
788 * Function : compile_url_pattern
790 * Description : Compiles the three parts of an URL pattern.
793 * 1 : url = Target pattern_spec to be filled in.
794 * 2 : buf = The url pattern to compile. Will be messed up.
796 * Returns : JB_ERR_OK - Success
797 * JB_ERR_MEMORY - Out of memory
798 * JB_ERR_PARSE - Cannot parse regex
800 *********************************************************************/
801 static jb_err compile_url_pattern(struct pattern_spec *url, char *buf)
804 const size_t prefix_length = 18;
806 #ifdef FEATURE_PCRE_HOST_PATTERNS
807 if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0)
809 url->pattern.url_spec.host_regex_type = PCRE_HOST_PATTERN;
810 /* Overwrite the "PCRE-HOST-PATTERN:" prefix */
811 memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1);
815 url->pattern.url_spec.host_regex_type = VANILLA_HOST_PATTERN;
818 if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0)
820 log_error(LOG_LEVEL_ERROR,
821 "PCRE-HOST-PATTERN detected while Privoxy has been compiled "
822 "without FEATURE_PCRE_HOST_PATTERNS: %s",
824 /* Overwrite the "PCRE-HOST-PATTERN:" prefix */
825 memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1);
827 * The pattern will probably not work as expected.
828 * We don't simply return JB_ERR_PARSE here so the
829 * regression tests can be loaded with and without
830 * FEATURE_PCRE_HOST_PATTERNS.
835 p = strchr(buf, '/');
839 * Only compile the regex if it consists of more than
840 * a single slash, otherwise it wouldn't affect the result.
845 * XXX: does it make sense to compile the slash at the beginning?
847 jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg);
849 if (JB_ERR_OK != err)
858 * IPv6 numeric hostnames can contain colons, thus we need
859 * to delimit the hostname before the real port separator.
860 * As brackets are already used in the hostname pattern,
861 * we use angle brackets ('<', '>') instead.
863 if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
870 /* IPv6 address without port number */
875 /* Garbage after address delimiter */
881 p = strchr(buf, ':');
887 url->pattern.url_spec.port_list = strdup_or_die(p);
891 url->pattern.url_spec.port_list = NULL;
896 #ifdef FEATURE_PCRE_HOST_PATTERNS
897 if (url->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
899 return compile_pcre_host_pattern(url, buf);
904 return compile_vanilla_host_pattern(url, buf);
913 #ifdef FEATURE_PCRE_HOST_PATTERNS
914 /*********************************************************************
916 * Function : compile_pcre_host_pattern
918 * Description : Parses and compiles a pcre host pattern.
921 * 1 : url = Target pattern_spec to be filled in.
922 * 2 : host_pattern = Host pattern to compile.
924 * Returns : JB_ERR_OK - Success
925 * JB_ERR_MEMORY - Out of memory
926 * JB_ERR_PARSE - Cannot parse regex
928 *********************************************************************/
929 static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern)
931 return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
933 #endif /* def FEATURE_PCRE_HOST_PATTERNS */
936 /*********************************************************************
938 * Function : compile_vanilla_host_pattern
940 * Description : Parses and "compiles" an old-school host pattern.
943 * 1 : url = Target pattern_spec to be filled in.
944 * 2 : host_pattern = Host pattern to parse.
946 * Returns : JB_ERR_OK - Success
947 * JB_ERR_PARSE - Cannot parse regex
949 *********************************************************************/
950 static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern)
959 if (host_pattern[strlen(host_pattern) - 1] == '.')
961 url->pattern.url_spec.unanchored |= ANCHOR_RIGHT;
963 if (host_pattern[0] == '.')
965 url->pattern.url_spec.unanchored |= ANCHOR_LEFT;
969 * Split domain into components
971 url->pattern.url_spec.dbuffer = strdup_or_die(host_pattern);
976 for (p = url->pattern.url_spec.dbuffer; *p ; p++)
978 *p = (char)privoxy_tolower(*p);
982 * Split the domain name into components
984 url->pattern.url_spec.dcount = ssplit(url->pattern.url_spec.dbuffer, ".", v, SZ(v));
986 if (url->pattern.url_spec.dcount < 0)
988 free_pattern_spec(url);
991 else if (url->pattern.url_spec.dcount != 0)
994 * Save a copy of the pointers in dvec
996 size = (size_t)url->pattern.url_spec.dcount * sizeof(*url->pattern.url_spec.dvec);
998 url->pattern.url_spec.dvec = malloc_or_die(size);
1000 memcpy(url->pattern.url_spec.dvec, v, size);
1003 * else dcount == 0 in which case we needn't do anything,
1004 * since dvec will never be accessed and the pattern will
1005 * match all domains.
1011 /*********************************************************************
1013 * Function : simplematch
1015 * Description : String matching, with a (greedy) '*' wildcard that
1016 * stands for zero or more arbitrary characters and
1017 * character classes in [], which take both enumerations
1021 * 1 : pattern = pattern for matching
1022 * 2 : text = text to be matched
1024 * Returns : 0 if match, else nonzero
1026 *********************************************************************/
1027 static int simplematch(const char *pattern, const char *text)
1029 const unsigned char *pat = (const unsigned char *)pattern;
1030 const unsigned char *txt = (const unsigned char *)text;
1031 const unsigned char *fallback = pat;
1034 unsigned char lastchar = 'a';
1036 unsigned char charmap[32];
1041 /* EOF pattern but !EOF text? */
1054 /* '*' in the pattern? */
1058 /* The pattern ends afterwards? Speed up the return. */
1064 /* Else, set wildcard mode and remember position after '*' */
1069 /* Character range specification? */
1072 memset(charmap, '\0', sizeof(charmap));
1074 while (*++pat != ']')
1080 else if (*pat == '-')
1082 if ((*++pat == ']') || *pat == '\0')
1086 for (i = lastchar; i <= *pat; i++)
1088 charmap[i / 8] |= (unsigned char)(1 << (i % 8));
1093 charmap[*pat / 8] |= (unsigned char)(1 << (*pat % 8));
1097 } /* -END- if Character range specification */
1101 * Char match, or char range match?
1105 || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
1115 * No match && no wildcard: No luck
1119 else if (pat != fallback)
1122 * Increment text pointer if in char range matching
1129 * Wildcard mode && nonmatch beyond fallback: Rewind pattern
1133 * Restart matching from current text pointer
1140 /* Cut off extra '*'s */
1141 if (*pat == '*') pat++;
1143 /* If this is the pattern's end, fine! */
1150 /*********************************************************************
1152 * Function : pcre2_pattern_matches
1154 * Description : Checks if a compiled pcre2 pattern matches a string.
1157 * 1 : pattern = The compiled pattern
1158 * 2 : string = The string to check
1160 * Returns : TRUE for yes, FALSE otherwise.
1162 *********************************************************************/
1163 static int pcre2_pattern_matches(const pcre2_code *pattern, const char *string)
1167 pcre2_match_data *pcre2_matches;
1169 assert(pattern != NULL);
1170 assert(string != NULL);
1174 pcre2_matches = pcre2_match_data_create_from_pattern(pattern, NULL);
1175 if (NULL == pcre2_matches)
1177 log_error(LOG_LEVEL_ERROR,
1178 "Out of memory while matching pattern against %s", string);
1182 ret = pcre2_match(pattern, (const unsigned char *)string, strlen(string),
1183 offset, 0, pcre2_matches, NULL);
1185 pcre2_match_data_free(pcre2_matches);
1192 /*********************************************************************
1194 * Function : regex_matches
1196 * Description : Checks if a compiled regex pattern matches a string
1197 * using either pcre2 or pcre1 code.
1200 * 1 : pattern = The compiled pattern
1201 * 2 : string = The string to check
1203 * Returns : TRUE for yes, FALSE otherwise.
1205 *********************************************************************/
1206 int regex_matches(const REGEX_TYPE *pattern, const char *string)
1209 return pcre2_pattern_matches(pattern, string);
1211 return (0 == regexec(pattern, string, 0, NULL, 0));
1215 /*********************************************************************
1217 * Function : simple_domaincmp
1219 * Description : Domain-wise Compare fqdn's. The comparison is
1220 * both left- and right-anchored. The individual
1221 * domain names are compared with simplematch().
1222 * This is only used by domain_match.
1225 * 1 : pv = array of patterns to compare
1226 * 2 : fv = array of domain components to compare
1227 * 3 : len = length of the arrays (both arrays are the
1228 * same length - if they weren't, it couldn't
1229 * possibly be a match).
1231 * Returns : 0 => domains are equivalent, else no match.
1233 *********************************************************************/
1234 static int simple_domaincmp(char **pv, char **fv, int len)
1238 for (n = 0; n < len; n++)
1240 if (simplematch(pv[n], fv[n]))
1251 /*********************************************************************
1253 * Function : domain_match
1255 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
1256 * p.pattern->unachored, the comparison is un-, left-,
1257 * right-anchored, or both.
1258 * The individual domain names are compared with
1262 * 1 : p = a domain that may contain a '*' as a wildcard.
1263 * 2 : fqdn = domain name against which the patterns are compared.
1265 * Returns : 0 => domains are equivalent, else no match.
1267 *********************************************************************/
1268 static int domain_match(const struct pattern_spec *p, const struct http_request *fqdn)
1270 char **pv, **fv; /* vectors */
1272 int unanchored = p->pattern.url_spec.unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
1274 plen = p->pattern.url_spec.dcount;
1275 flen = fqdn->dcount;
1279 /* fqdn is too short to match this pattern */
1283 pv = p->pattern.url_spec.dvec;
1286 if (unanchored == ANCHOR_LEFT)
1291 * Convert this into a fully anchored pattern with
1292 * the fqdn and pattern the same length
1294 fv += (flen - plen); /* flen - plen >= 0 due to check above */
1295 return simple_domaincmp(pv, fv, plen);
1297 else if (unanchored == 0)
1299 /* Fully anchored, check length */
1304 return simple_domaincmp(pv, fv, plen);
1306 else if (unanchored == ANCHOR_RIGHT)
1308 /* Left anchored, ignore all extra in fqdn */
1309 return simple_domaincmp(pv, fv, plen);
1315 int maxn = flen - plen;
1316 for (n = 0; n <= maxn; n++)
1318 if (!simple_domaincmp(pv, fv, plen))
1323 * Doesn't match from start of fqdn
1324 * Try skipping first part of fqdn
1334 /*********************************************************************
1336 * Function : create_pattern_spec
1338 * Description : Creates a "pattern_spec" structure from a string.
1339 * When finished, free with free_pattern_spec().
1342 * 1 : pattern = Target pattern_spec to be filled in.
1343 * Will be zeroed before use.
1344 * 2 : buf = Source pattern, null terminated. NOTE: The
1345 * contents of this buffer are destroyed by this
1346 * function. If this function succeeds, the
1347 * buffer is copied to pattern->spec. If this
1348 * function fails, the contents of the buffer
1351 * Returns : JB_ERR_OK - Success
1352 * JB_ERR_PARSE - Cannot parse regex (Detailed message
1353 * written to system log)
1355 *********************************************************************/
1356 jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf)
1360 /** The tag pattern prefix to match */
1363 /** The length of the prefix to match */
1364 const size_t prefix_length;
1366 /** The pattern flag */
1367 const unsigned flag;
1369 { "TAG:", 4, PATTERN_SPEC_TAG_PATTERN},
1370 #ifdef FEATURE_CLIENT_TAGS
1371 { "CLIENT-TAG:", 11, PATTERN_SPEC_CLIENT_TAG_PATTERN},
1373 { "NO-REQUEST-TAG:", 15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN},
1374 { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN}
1381 memset(pattern, '\0', sizeof(*pattern));
1383 /* Remember the original specification for the CGI pages. */
1384 pattern->spec = strdup_or_die(buf);
1386 /* Check if it's a tag pattern */
1387 for (i = 0; i < SZ(tag_pattern); i++)
1389 if (0 == strncmpic(pattern->spec, tag_pattern[i].prefix, tag_pattern[i].prefix_length))
1391 /* The regex starts after the prefix */
1392 const char *tag_regex = buf + tag_pattern[i].prefix_length;
1394 pattern->flags |= tag_pattern[i].flag;
1396 return compile_pattern(tag_regex, NO_ANCHORING, pattern,
1397 &pattern->pattern.tag_regex);
1401 /* If it isn't a tag pattern it must be an URL pattern. */
1402 pattern->flags |= PATTERN_SPEC_URL_PATTERN;
1404 return compile_url_pattern(pattern, buf);
1409 /*********************************************************************
1411 * Function : free_pattern_spec
1413 * Description : Called from the "unloaders". Freez the pattern
1414 * structure elements.
1417 * 1 : pattern = pointer to a pattern_spec structure.
1421 *********************************************************************/
1422 void free_pattern_spec(struct pattern_spec *pattern)
1424 if (pattern == NULL) return;
1426 freez(pattern->spec);
1428 if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
1430 if (pattern->pattern.tag_regex)
1433 pcre2_code_free(pattern->pattern.tag_regex);
1435 regfree(pattern->pattern.tag_regex);
1436 freez(pattern->pattern.tag_regex);
1442 #ifdef FEATURE_PCRE_HOST_PATTERNS
1443 if (pattern->pattern.url_spec.host_regex)
1446 pcre2_code_free(pattern->pattern.url_spec.host_regex);
1448 regfree(pattern->pattern.url_spec.host_regex);
1449 freez(pattern->pattern.url_spec.host_regex);
1452 #endif /* def FEATURE_PCRE_HOST_PATTERNS */
1453 freez(pattern->pattern.url_spec.dbuffer);
1454 freez(pattern->pattern.url_spec.dvec);
1455 pattern->pattern.url_spec.dcount = 0;
1456 freez(pattern->pattern.url_spec.port_list);
1457 if (pattern->pattern.url_spec.preg)
1460 pcre2_code_free(pattern->pattern.url_spec.preg);
1462 regfree(pattern->pattern.url_spec.preg);
1463 freez(pattern->pattern.url_spec.preg);
1469 /*********************************************************************
1471 * Function : port_matches
1473 * Description : Compares a port against a port list.
1476 * 1 : port = The port to check.
1477 * 2 : port_list = The list of port to compare with.
1479 * Returns : TRUE for yes, FALSE otherwise.
1481 *********************************************************************/
1482 static int port_matches(const int port, const char *port_list)
1484 return ((NULL == port_list) || match_portlist(port_list, port));
1488 /*********************************************************************
1490 * Function : host_matches
1492 * Description : Compares a host against a host pattern.
1495 * 1 : url = The URL to match
1496 * 2 : pattern = The URL pattern
1498 * Returns : TRUE for yes, FALSE otherwise.
1500 *********************************************************************/
1501 static int host_matches(const struct http_request *http,
1502 const struct pattern_spec *pattern)
1504 assert(http->host != NULL);
1505 #ifdef FEATURE_PCRE_HOST_PATTERNS
1506 if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
1508 return ((NULL == pattern->pattern.url_spec.host_regex)
1509 || regex_matches(pattern->pattern.url_spec.host_regex, http->host));
1512 return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
1516 /*********************************************************************
1518 * Function : path_matches
1520 * Description : Compares a path against a path pattern.
1523 * 1 : path = The path to match
1524 * 2 : pattern = The URL pattern
1526 * Returns : TRUE for yes, FALSE otherwise.
1528 *********************************************************************/
1529 static int path_matches(const char *path, const struct pattern_spec *pattern)
1531 return ((NULL == pattern->pattern.url_spec.preg)
1532 || regex_matches(pattern->pattern.url_spec.preg, path));
1536 /*********************************************************************
1538 * Function : url_match
1540 * Description : Compare a URL against a URL pattern.
1543 * 1 : pattern = a URL pattern
1544 * 2 : url = URL to match
1546 * Returns : Nonzero if the URL matches the pattern, else 0.
1548 *********************************************************************/
1549 int url_match(const struct pattern_spec *pattern,
1550 const struct http_request *http)
1552 if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN))
1554 /* It's not an URL pattern and thus shouldn't be matched against URLs */
1558 return (port_matches(http->port, pattern->pattern.url_spec.port_list)
1559 && host_matches(http, pattern) && path_matches(http->path, pattern));
1564 /*********************************************************************
1566 * Function : match_portlist
1568 * Description : Check if a given number is covered by a comma
1569 * separated list of numbers and ranges (a,b-c,d,..)
1572 * 1 : portlist = String with list
1573 * 2 : port = port to check
1575 * Returns : 0 => no match
1578 *********************************************************************/
1579 int match_portlist(const char *portlist, int port)
1581 char *min, *max, *next, *portlist_copy;
1583 min = portlist_copy = strdup_or_die(portlist);
1586 * Zero-terminate first item and remember offset for next
1588 if (NULL != (next = strchr(portlist_copy, (int) ',')))
1594 * Loop through all items, checking for match
1598 if (NULL == (max = strchr(min, (int) '-')))
1601 * No dash, check for equality
1603 if (port == atoi(min))
1605 freez(portlist_copy);
1612 * This is a range, so check if between min and max,
1613 * or, if max was omitted, between min and 65K
1616 if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
1618 freez(portlist_copy);
1630 * Zero-terminate next item and remember offset for n+1
1632 if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
1638 freez(portlist_copy);
1644 /*********************************************************************
1646 * Function : parse_forwarder_address
1648 * Description : Parse out the username, password, host and port from
1649 * a forwarder address.
1652 * 1 : address = The forwarder address to parse.
1653 * 2 : hostname = Used to return the hostname. NULL on error.
1654 * 3 : port = Used to return the port. Untouched if no port
1656 * 4 : username = Used to return the username if any.
1657 * 5 : password = Used to return the password if any.
1659 * Returns : JB_ERR_OK on success
1660 * JB_ERR_MEMORY on out of memory
1661 * JB_ERR_PARSE on malformed address.
1663 *********************************************************************/
1664 jb_err parse_forwarder_address(char *address, char **hostname, int *port,
1665 char **username, char **password)
1670 tmp = *hostname = strdup_or_die(address);
1672 /* Parse username and password */
1673 if (username && password && (NULL != (p = strchr(*hostname, '@'))))
1676 *username = strdup_or_die(*hostname);
1677 *hostname = strdup_or_die(p);
1679 if (NULL != (p = strchr(*username, ':')))
1682 *password = strdup_or_die(p);
1687 /* Parse hostname and port */
1689 if ((*p == '[') && (NULL == strchr(p, ']')))
1691 /* XXX: Should do some more validity checks here. */
1692 return JB_ERR_PARSE;
1695 if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
1698 memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
1701 *port = (int)strtol(++p, NULL, 0);
1704 else if (NULL != (p = strchr(*hostname, ':')))
1707 *port = (int)strtol(p, NULL, 0);