X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=urlmatch.c;h=8c8506f2da7af27d63aa4b73a432633477189caf;hp=c48fd38569fda9cdb47e973689eaa14e5ce54399;hb=9e48812c17c8eed95b5dc4ea56ab0ff201edf790;hpb=50f87c9446b435d47c371f63615260636639f450 diff --git a/urlmatch.c b/urlmatch.c index c48fd385..8c8506f2 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -1,4 +1,4 @@ -const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.68 2012/03/09 16:23:50 fabiankeil Exp $"; +const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.77 2013/11/24 14:24:18 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $ @@ -65,7 +65,7 @@ enum regex_anchoring RIGHT_ANCHORED, RIGHT_ANCHORED_HOST }; -static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern); +static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern); /********************************************************************* * @@ -115,7 +115,6 @@ void free_http_request(struct http_request *http) * 1 : http = pointer to the http structure to hold elements. * * Returns : JB_ERR_OK on success - * JB_ERR_MEMORY on out of memory * JB_ERR_PARSE on malformed command/URL * or >100 domains deep. * @@ -126,11 +125,7 @@ jb_err init_domain_components(struct http_request *http) size_t size; char *p; - http->dbuffer = strdup(http->host); - if (NULL == http->dbuffer) - { - return JB_ERR_MEMORY; - } + http->dbuffer = strdup_or_die(http->host); /* map to lower case */ for (p = http->dbuffer; *p ; p++) @@ -139,7 +134,7 @@ jb_err init_domain_components(struct http_request *http) } /* split the domain name into components */ - http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec), 1, 1); + http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec)); if (http->dcount <= 0) { @@ -154,11 +149,7 @@ jb_err init_domain_components(struct http_request *http) /* save a copy of the pointers in dvec */ size = (size_t)http->dcount * sizeof(*http->dvec); - http->dvec = (char **)malloc(size); - if (NULL == http->dvec) - { - return JB_ERR_MEMORY; - } + http->dvec = malloc_or_die(size); memcpy(http->dvec, vec, size); @@ -229,7 +220,6 @@ int url_requires_percent_encoding(const char *url) * protocol are acceptable. * * Returns : JB_ERR_OK on success - * JB_ERR_MEMORY on out of memory * JB_ERR_PARSE on malformed command/URL * or >100 domains deep. * @@ -241,23 +231,15 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr /* * Save our initial URL */ - http->url = strdup(url); - if (http->url == NULL) - { - return JB_ERR_MEMORY; - } - + http->url = strdup_or_die(url); /* * Check for * URI. If found, we're done. */ if (*http->url == '*') { - if (NULL == (http->path = strdup("*")) - || NULL == (http->hostport = strdup(""))) - { - return JB_ERR_MEMORY; - } + http->path = strdup_or_die("*"); + http->hostport = strdup_or_die(""); if (http->url[1] != '\0') { return JB_ERR_PARSE; @@ -274,11 +256,7 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr char *url_noproto; char *url_path; - buf = strdup(url); - if (buf == NULL) - { - return JB_ERR_MEMORY; - } + buf = strdup_or_die(url); /* Find the start of the URL in our scratch space */ url_noproto = buf; @@ -321,9 +299,9 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr * https URL in and it's parsed by the function. (When the * URL is actually retrieved, SSL hides the path part). */ - http->path = strdup(http->ssl ? "/" : url_path); + http->path = strdup_or_die(http->ssl ? "/" : url_path); *url_path = '\0'; - http->hostport = strdup(url_noproto); + http->hostport = strdup_or_die(url_noproto); } else { @@ -331,17 +309,11 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr * Repair broken HTTP requests that don't contain a path, * or CONNECT requests */ - http->path = strdup("/"); - http->hostport = strdup(url_noproto); + http->path = strdup_or_die("/"); + http->hostport = strdup_or_die(url_noproto); } freez(buf); - - if ((http->path == NULL) - || (http->hostport == NULL)) - { - return JB_ERR_MEMORY; - } } if (!host_available) @@ -358,11 +330,7 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr char *host; char *port; - buf = strdup(http->hostport); - if (buf == NULL) - { - return JB_ERR_MEMORY; - } + buf = strdup_or_die(http->hostport); /* check if url contains username and/or password */ host = strchr(buf, '@'); @@ -414,9 +382,18 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr if (port != NULL) { /* Contains port */ + char *endptr; + long parsed_port; /* Terminate hostname and point to start of port string */ *port++ = '\0'; - http->port = atoi(port); + parsed_port = strtol(port, &endptr, 10); + if ((parsed_port <= 0) || (parsed_port > 65535) || (*endptr != '\0')) + { + log_error(LOG_LEVEL_ERROR, "Invalid port in URL: %s.", url); + freez(buf); + return JB_ERR_PARSE; + } + http->port = (int)parsed_port; } else { @@ -424,14 +401,9 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr http->port = (http->ssl ? 443 : 80); } - http->host = strdup(host); + http->host = strdup_or_die(host); freez(buf); - - if (http->host == NULL) - { - return JB_ERR_MEMORY; - } } #ifdef FEATURE_EXTENDED_HOST_PATTERNS @@ -510,7 +482,6 @@ static int unknown_method(const char *method) * 2 : http = pointer to the http structure to hold elements * * Returns : JB_ERR_OK on success - * JB_ERR_MEMORY on out of memory * JB_ERR_CGI_PARAMS on malformed command/URL * or >100 domains deep. * @@ -524,13 +495,9 @@ jb_err parse_http_request(const char *req, struct http_request *http) memset(http, '\0', sizeof(*http)); - buf = strdup(req); - if (buf == NULL) - { - return JB_ERR_MEMORY; - } + buf = strdup_or_die(req); - n = ssplit(buf, " \r\n", v, SZ(v), 1, 1); + n = ssplit(buf, " \r\n", v, SZ(v)); if (n != 3) { freez(buf); @@ -573,19 +540,12 @@ jb_err parse_http_request(const char *req, struct http_request *http) /* * Copy the details into the structure */ - http->cmd = strdup(req); - http->gpc = strdup(v[0]); - http->ver = strdup(v[2]); + http->cmd = strdup_or_die(req); + http->gpc = strdup_or_die(v[0]); + http->ver = strdup_or_die(v[2]); freez(buf); - if ((http->cmd == NULL) - || (http->gpc == NULL) - || (http->ver == NULL)) - { - return JB_ERR_MEMORY; - } - return JB_ERR_OK; } @@ -613,7 +573,7 @@ jb_err parse_http_request(const char *req, struct http_request *http) * *********************************************************************/ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring, - struct url_spec *url, regex_t **regex) + struct pattern_spec *url, regex_t **regex) { int errcode; char rebuf[BUFFER_SIZE]; @@ -650,7 +610,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin *regex = zalloc(sizeof(**regex)); if (NULL == *regex) { - free_url_spec(url); + free_pattern_spec(url); return JB_ERR_MEMORY; } @@ -668,7 +628,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin rebuf[errlen] = '\0'; log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s", pattern, url->spec, rebuf); - free_url_spec(url); + free_pattern_spec(url); return JB_ERR_PARSE; } @@ -685,7 +645,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin * Description : Compiles the three parts of an URL pattern. * * Parameters : - * 1 : url = Target url_spec to be filled in. + * 1 : url = Target pattern_spec to be filled in. * 2 : buf = The url pattern to compile. Will be messed up. * * Returns : JB_ERR_OK - Success @@ -693,7 +653,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin * JB_ERR_PARSE - Cannot parse regex * *********************************************************************/ -static jb_err compile_url_pattern(struct url_spec *url, char *buf) +static jb_err compile_url_pattern(struct pattern_spec *url, char *buf) { char *p; @@ -709,7 +669,7 @@ static jb_err compile_url_pattern(struct url_spec *url, char *buf) /* * XXX: does it make sense to compile the slash at the beginning? */ - jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->preg); + jb_err err = compile_pattern(p, LEFT_ANCHORED, url, &url->pattern.url_spec.preg); if (JB_ERR_OK != err) { @@ -749,15 +709,11 @@ static jb_err compile_url_pattern(struct url_spec *url, char *buf) if (NULL != p) { *p++ = '\0'; - url->port_list = strdup(p); - if (NULL == url->port_list) - { - return JB_ERR_MEMORY; - } + url->pattern.url_spec.port_list = strdup_or_die(p); } else { - url->port_list = NULL; + url->pattern.url_spec.port_list = NULL; } if (buf[0] != '\0') @@ -778,7 +734,7 @@ static jb_err compile_url_pattern(struct url_spec *url, char *buf) * Description : Parses and compiles a host pattern. * * Parameters : - * 1 : url = Target url_spec to be filled in. + * 1 : url = Target pattern_spec to be filled in. * 2 : host_pattern = Host pattern to compile. * * Returns : JB_ERR_OK - Success @@ -786,9 +742,9 @@ static jb_err compile_url_pattern(struct url_spec *url, char *buf) * JB_ERR_PARSE - Cannot parse regex * *********************************************************************/ -static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern) +static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern) { - return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->host_regex); + return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex); } #else @@ -800,15 +756,14 @@ static jb_err compile_host_pattern(struct url_spec *url, const char *host_patter * Description : Parses and "compiles" an old-school host pattern. * * Parameters : - * 1 : url = Target url_spec to be filled in. + * 1 : url = Target pattern_spec to be filled in. * 2 : host_pattern = Host pattern to parse. * * Returns : JB_ERR_OK - Success - * JB_ERR_MEMORY - Out of memory * JB_ERR_PARSE - Cannot parse regex * *********************************************************************/ -static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern) +static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern) { char *v[150]; size_t size; @@ -829,12 +784,7 @@ static jb_err compile_host_pattern(struct url_spec *url, const char *host_patter /* * Split domain into components */ - url->dbuffer = strdup(host_pattern); - if (NULL == url->dbuffer) - { - free_url_spec(url); - return JB_ERR_MEMORY; - } + url->dbuffer = strdup_or_die(host_pattern); /* * Map to lower case @@ -847,12 +797,12 @@ static jb_err compile_host_pattern(struct url_spec *url, const char *host_patter /* * Split the domain name into components */ - url->dcount = ssplit(url->dbuffer, ".", v, SZ(v), 1, 1); + url->dcount = ssplit(url->dbuffer, ".", v, SZ(v)); if (url->dcount < 0) { - free_url_spec(url); - return JB_ERR_MEMORY; + free_pattern_spec(url); + return JB_ERR_PARSE; } else if (url->dcount != 0) { @@ -861,12 +811,7 @@ static jb_err compile_host_pattern(struct url_spec *url, const char *host_patter */ size = (size_t)url->dcount * sizeof(*url->dvec); - url->dvec = (char **)malloc(size); - if (NULL == url->dvec) - { - free_url_spec(url); - return JB_ERR_MEMORY; - } + url->dvec = malloc_or_die(size); memcpy(url->dvec, v, size); } @@ -972,8 +917,8 @@ static int simplematch(const char *pattern, const char *text) * Char match, or char range match? */ if ((*pat == *txt) - || (*pat == '?') - || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8))))) + || (*pat == '?') + || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8))))) { /* * Success: Go ahead @@ -1009,7 +954,7 @@ static int simplematch(const char *pattern, const char *text) } /* Cut off extra '*'s */ - if (*pat == '*') pat++; + if (*pat == '*') pat++; /* If this is the pattern's end, fine! */ return(*pat); @@ -1070,7 +1015,7 @@ static int simple_domaincmp(char **pv, char **fv, int len) * Returns : 0 => domains are equivalent, else no match. * *********************************************************************/ -static int domain_match(const struct url_spec *pattern, const struct http_request *fqdn) +static int domain_match(const struct pattern_spec *pattern, const struct http_request *fqdn) { char **pv, **fv; /* vectors */ int plen, flen; @@ -1139,93 +1084,106 @@ static int domain_match(const struct url_spec *pattern, const struct http_reques /********************************************************************* * - * Function : create_url_spec + * Function : create_pattern_spec * - * Description : Creates a "url_spec" structure from a string. - * When finished, free with free_url_spec(). + * Description : Creates a "pattern_spec" structure from a string. + * When finished, free with free_pattern_spec(). * * Parameters : - * 1 : url = Target url_spec to be filled in. Will be - * zeroed before use. + * 1 : pattern = Target pattern_spec to be filled in. + * Will be zeroed before use. * 2 : buf = Source pattern, null terminated. NOTE: The * contents of this buffer are destroyed by this * function. If this function succeeds, the - * buffer is copied to url->spec. If this + * buffer is copied to pattern->spec. If this * function fails, the contents of the buffer * are lost forever. * * Returns : JB_ERR_OK - Success - * JB_ERR_MEMORY - Out of memory * JB_ERR_PARSE - Cannot parse regex (Detailed message * written to system log) * *********************************************************************/ -jb_err create_url_spec(struct url_spec *url, char *buf) +jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf) { - assert(url); + assert(pattern); assert(buf); - memset(url, '\0', sizeof(*url)); + memset(pattern, '\0', sizeof(*pattern)); /* Remember the original specification for the CGI pages. */ - url->spec = strdup(buf); - if (NULL == url->spec) - { - return JB_ERR_MEMORY; - } + pattern->spec = strdup_or_die(buf); - /* Is it a tag pattern? */ - if (0 == strncmpic(url->spec, "TAG:", 4)) + /* Is it a positive tag pattern? */ + if (0 == strncmpic(pattern->spec, "TAG:", 4)) { /* The pattern starts with the first character after "TAG:" */ const char *tag_pattern = buf + 4; - return compile_pattern(tag_pattern, NO_ANCHORING, url, &url->tag_regex); + pattern->flags |= PATTERN_SPEC_TAG_PATTERN; + return compile_pattern(tag_pattern, NO_ANCHORING, pattern, &pattern->pattern.tag_regex); + } + /* Is it a negative tag pattern? */ + if (0 == strncmpic(pattern->spec, "NO-REQUEST-TAG:", 15)) + { + /* The pattern starts with the first character after "NO-REQUEST-TAG:" */ + const char *tag_pattern = buf + 15; + pattern->flags |= PATTERN_SPEC_NO_REQUEST_TAG_PATTERN; + return compile_pattern(tag_pattern, NO_ANCHORING, pattern, &pattern->pattern.tag_regex); + } + if (0 == strncmpic(pattern->spec, "NO-RESPONSE-TAG:", 16)) + { + /* The pattern starts with the first character after "NO-RESPONSE-TAG:" */ + const char *tag_pattern = buf + 16; + pattern->flags |= PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN; + return compile_pattern(tag_pattern, NO_ANCHORING, pattern, &pattern->pattern.tag_regex); } + pattern->flags |= PATTERN_SPEC_URL_PATTERN; + /* If it isn't a tag pattern it must be an URL pattern. */ - return compile_url_pattern(url, buf); + return compile_url_pattern(pattern, buf); } /********************************************************************* * - * Function : free_url_spec + * Function : free_pattern_spec * - * Description : Called from the "unloaders". Freez the url + * Description : Called from the "unloaders". Freez the pattern * structure elements. * * Parameters : - * 1 : url = pointer to a url_spec structure. + * 1 : pattern = pointer to a pattern_spec structure. * * Returns : N/A * *********************************************************************/ -void free_url_spec(struct url_spec *url) +void free_pattern_spec(struct pattern_spec *pattern) { - if (url == NULL) return; + if (pattern == NULL) return; - freez(url->spec); + freez(pattern->spec); #ifdef FEATURE_EXTENDED_HOST_PATTERNS - if (url->host_regex) + if (pattern->pattern.url_spec.host_regex) { - regfree(url->host_regex); - freez(url->host_regex); + regfree(pattern->pattern.url_spec.host_regex); + freez(pattern->pattern.url_spec.host_regex); } #else - freez(url->dbuffer); - freez(url->dvec); - url->dcount = 0; + freez(pattern->pattern.url_spec.dbuffer); + freez(pattern->pattern.url_spec.dvec); + pattern->pattern.url_spec.dcount = 0; #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */ - freez(url->port_list); - if (url->preg) + freez(pattern->pattern.url_spec.port_list); + if (pattern->pattern.url_spec.preg) { - regfree(url->preg); - freez(url->preg); + regfree(pattern->pattern.url_spec.preg); + freez(pattern->pattern.url_spec.preg); } - if (url->tag_regex) + if (pattern->pattern.tag_regex) { - regfree(url->tag_regex); - freez(url->tag_regex); + regfree(pattern->pattern.tag_regex); + freez(pattern->pattern.tag_regex); } } @@ -1263,13 +1221,13 @@ static int port_matches(const int port, const char *port_list) * *********************************************************************/ static int host_matches(const struct http_request *http, - const struct url_spec *pattern) + const struct pattern_spec *pattern) { #ifdef FEATURE_EXTENDED_HOST_PATTERNS - return ((NULL == pattern->host_regex) - || (0 == regexec(pattern->host_regex, http->host, 0, NULL, 0))); + return ((NULL == pattern->pattern.url_spec.host_regex) + || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0))); #else - return ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, http))); + return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http))); #endif } @@ -1287,10 +1245,10 @@ static int host_matches(const struct http_request *http, * Returns : TRUE for yes, FALSE otherwise. * *********************************************************************/ -static int path_matches(const char *path, const struct url_spec *pattern) +static int path_matches(const char *path, const struct pattern_spec *pattern) { - return ((NULL == pattern->preg) - || (0 == regexec(pattern->preg, path, 0, NULL, 0))); + return ((NULL == pattern->pattern.url_spec.preg) + || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0))); } @@ -1307,16 +1265,16 @@ static int path_matches(const char *path, const struct url_spec *pattern) * Returns : Nonzero if the URL matches the pattern, else 0. * *********************************************************************/ -int url_match(const struct url_spec *pattern, +int url_match(const struct pattern_spec *pattern, const struct http_request *http) { - if (pattern->tag_regex != NULL) + if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN)) { - /* It's a tag pattern and shouldn't be matched against URLs */ + /* It's not an URL pattern and thus shouldn't be matched against URLs */ return 0; } - return (port_matches(http->port, pattern->port_list) + return (port_matches(http->port, pattern->pattern.url_spec.port_list) && host_matches(http, pattern) && path_matches(http->path, pattern)); } @@ -1341,7 +1299,7 @@ int match_portlist(const char *portlist, int port) { char *min, *max, *next, *portlist_copy; - min = portlist_copy = strdup(portlist); + min = portlist_copy = strdup_or_die(portlist); /* * Zero-terminate first item and remember offset for next @@ -1429,11 +1387,7 @@ jb_err parse_forwarder_address(char *address, char **hostname, int *port) return JB_ERR_PARSE; } - *hostname = strdup(address); - if (NULL == *hostname) - { - return JB_ERR_MEMORY; - } + *hostname = strdup_or_die(address); if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']')))) {