X-Git-Url: http://www.privoxy.org/gitweb/show-status?a=blobdiff_plain;f=urlmatch.c;h=84e9d29858edb1b1dcc3ff71d7a8034a73edbf58;hb=1603ca22d9a21bc6f0a181994e6971fd62cd3697;hp=6949eedb82d2716f306ba8c0b73a2d4576813433;hpb=b40487eec42668166a0dd9feb10626a03faea635;p=privoxy.git diff --git a/urlmatch.c b/urlmatch.c index 6949eedb..84e9d298 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -5,7 +5,7 @@ * Purpose : Declares functions to match URLs against URL * patterns. * - * Copyright : Written by and Copyright (C) 2001-2014 + * Copyright : Written by and Copyright (C) 2001-2020 * the Privoxy team. https://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -45,7 +45,7 @@ #include #include -#if !defined(_WIN32) && !defined(__OS2__) +#if !defined(_WIN32) #include #endif @@ -62,7 +62,10 @@ enum regex_anchoring RIGHT_ANCHORED, RIGHT_ANCHORED_HOST }; -static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern); +static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern); +#ifdef FEATURE_PCRE_HOST_PATTERNS +static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern); +#endif /********************************************************************* * @@ -89,15 +92,12 @@ void free_http_request(struct http_request *http) freez(http->path); freez(http->version); freez(http->host_ip_addr_str); -#ifndef FEATURE_EXTENDED_HOST_PATTERNS freez(http->dbuffer); freez(http->dvec); http->dcount = 0; -#endif } -#ifndef FEATURE_EXTENDED_HOST_PATTERNS /********************************************************************* * * Function : init_domain_components @@ -105,7 +105,7 @@ void free_http_request(struct http_request *http) * Description : Splits the domain name so we can compare it * against wildcards. It used to be part of * parse_http_url, but was separated because the - * same code is required in chat in case of + * same code is required in chat() in case of * intercepted requests. * * Parameters : @@ -152,7 +152,6 @@ jb_err init_domain_components(struct http_request *http) return JB_ERR_OK; } -#endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */ /********************************************************************* @@ -246,7 +245,7 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr /* - * Split URL into protocol,hostport,path. + * Split URL into protocol, hostport, path. */ { char *buf; @@ -264,7 +263,9 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr else if (strncmpic(url_noproto, "https://", 8) == 0) { /* - * Should only happen when called from cgi_show_url_info(). + * Should only happen when called from cgi_show_url_info() + * or when the request was https-inspected and the request + * line got rewritten. */ url_noproto += 8; http->ssl = 1; @@ -303,7 +304,7 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr url_path ); *url_path = '\0'; - http->hostport = strdup_or_die(url_noproto); + http->hostport = string_tolower(url_noproto); } else { @@ -312,10 +313,15 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr * or CONNECT requests */ http->path = strdup_or_die("/"); - http->hostport = strdup_or_die(url_noproto); + http->hostport = string_tolower(url_noproto); } freez(buf); + + if (http->hostport == NULL) + { + return JB_ERR_PARSE; + } } if (!host_available) @@ -408,12 +414,8 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr freez(buf); } -#ifdef FEATURE_EXTENDED_HOST_PATTERNS - return JB_ERR_OK; -#else /* Split domain name so we can compare it against wildcards */ return init_domain_components(http); -#endif /* def FEATURE_EXTENDED_HOST_PATTERNS */ } @@ -602,6 +604,100 @@ jb_err parse_http_request(const char *req, struct http_request *http) } +#ifdef HAVE_PCRE2 +/********************************************************************* + * + * Function : compile_pattern + * + * Description : Compiles a host, domain or TAG pattern. + * + * Parameters : + * 1 : pattern = The pattern to compile. + * 2 : anchoring = How the regex should be modified + * before compilation. Can be either + * one of NO_ANCHORING, LEFT_ANCHORED, + * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST. + * 3 : url = In case of failures, the spec member is + * logged and the structure freed. + * 4 : regex = Where the compiled regex should be stored. + * + * Returns : JB_ERR_OK - Success + * JB_ERR_PARSE - Cannot parse regex + * + *********************************************************************/ +static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring, + struct pattern_spec *url, pcre2_code **regex) +{ + int errcode; + const char *fmt = NULL; + char *rebuf; + size_t rebuf_size; + PCRE2_SIZE error_offset; + int ret; + + assert(pattern); + + if (pattern[0] == '\0') + { + *regex = NULL; + return JB_ERR_OK; + } + + switch (anchoring) + { + case NO_ANCHORING: + fmt = "%s"; + break; + case RIGHT_ANCHORED: + fmt = "%s$"; + break; + case RIGHT_ANCHORED_HOST: + fmt = "%s\\.?$"; + break; + case LEFT_ANCHORED: + fmt = "^%s"; + break; + default: + log_error(LOG_LEVEL_FATAL, + "Invalid anchoring in compile_pattern %d", anchoring); + } + rebuf_size = strlen(pattern) + strlen(fmt); + rebuf = malloc_or_die(rebuf_size); + + snprintf(rebuf, rebuf_size, fmt, pattern); + + *regex = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, PCRE2_CASELESS, &errcode, + &error_offset, NULL); + if (*regex == NULL) + { + log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s", + pattern, url->spec, rebuf); + freez(rebuf); + + return JB_ERR_PARSE; + } + +#ifndef DISABLE_PCRE_JIT_COMPILATION + /* Try to enable JIT compilation but continue if it's unsupported. */ + if ((ret = pcre2_jit_compile(*regex, PCRE2_JIT_COMPLETE)) && + (ret != PCRE2_ERROR_JIT_BADOPTION)) + { + log_error(LOG_LEVEL_ERROR, + "Unexpected error enabling JIT compilation for %s from %s: %s", + pattern, url->spec, rebuf); + freez(rebuf); + + return JB_ERR_PARSE; + } +#endif + + freez(rebuf); + + return JB_ERR_OK; + +} +#else /********************************************************************* * * Function : compile_pattern @@ -684,6 +780,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin return JB_ERR_OK; } +#endif /********************************************************************* @@ -704,6 +801,36 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin static jb_err compile_url_pattern(struct pattern_spec *url, char *buf) { char *p; + const size_t prefix_length = 18; + +#ifdef FEATURE_PCRE_HOST_PATTERNS + if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0) + { + url->pattern.url_spec.host_regex_type = PCRE_HOST_PATTERN; + /* Overwrite the "PCRE-HOST-PATTERN:" prefix */ + memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1); + } + else + { + url->pattern.url_spec.host_regex_type = VANILLA_HOST_PATTERN; + } +#else + if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0) + { + log_error(LOG_LEVEL_ERROR, + "PCRE-HOST-PATTERN detected while Privoxy has been compiled " + "without FEATURE_PCRE_HOST_PATTERNS: %s", + buf); + /* Overwrite the "PCRE-HOST-PATTERN:" prefix */ + memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1); + /* + * The pattern will probably not work as expected. + * We don't simply return JB_ERR_PARSE here so the + * regression tests can be loaded with and without + * FEATURE_PCRE_HOST_PATTERNS. + */ + } +#endif p = strchr(buf, '/'); if (NULL != p) @@ -766,7 +893,16 @@ static jb_err compile_url_pattern(struct pattern_spec *url, char *buf) if (buf[0] != '\0') { - return compile_host_pattern(url, buf); +#ifdef FEATURE_PCRE_HOST_PATTERNS + if (url->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN) + { + return compile_pcre_host_pattern(url, buf); + } + else +#endif + { + return compile_vanilla_host_pattern(url, buf); + } } return JB_ERR_OK; @@ -774,12 +910,12 @@ static jb_err compile_url_pattern(struct pattern_spec *url, char *buf) } -#ifdef FEATURE_EXTENDED_HOST_PATTERNS +#ifdef FEATURE_PCRE_HOST_PATTERNS /********************************************************************* * - * Function : compile_host_pattern + * Function : compile_pcre_host_pattern * - * Description : Parses and compiles a host pattern. + * Description : Parses and compiles a pcre host pattern. * * Parameters : * 1 : url = Target pattern_spec to be filled in. @@ -790,16 +926,16 @@ static jb_err compile_url_pattern(struct pattern_spec *url, char *buf) * JB_ERR_PARSE - Cannot parse regex * *********************************************************************/ -static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern) +static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern) { return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex); } +#endif /* def FEATURE_PCRE_HOST_PATTERNS */ -#else /********************************************************************* * - * Function : compile_host_pattern + * Function : compile_vanilla_host_pattern * * Description : Parses and "compiles" an old-school host pattern. * @@ -811,7 +947,7 @@ static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pa * JB_ERR_PARSE - Cannot parse regex * *********************************************************************/ -static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern) +static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern) { char *v[150]; size_t size; @@ -1010,6 +1146,49 @@ static int simplematch(const char *pattern, const char *text) } +#ifdef HAVE_PCRE2 +/********************************************************************* + * + * Function : pcre2_pattern_matches + * + * Description : Checks if a compiled pcre2 pattern matches a string. + * + * Parameters : + * 1 : pattern = The compiled pattern + * 2 : string = The string to check + * + * Returns : TRUE for yes, FALSE otherwise. + * + *********************************************************************/ +int pcre2_pattern_matches(const pcre2_code *pattern, const char *string) +{ + PCRE2_SIZE offset; + int ret; + pcre2_match_data *pcre2_matches; + + assert(pattern != NULL); + assert(string != NULL); + + offset = 0; + + pcre2_matches = pcre2_match_data_create_from_pattern(pattern, NULL); + if (NULL == pcre2_matches) + { + log_error(LOG_LEVEL_ERROR, + "Out of memory while matching pattern against %s", string); + return FALSE; + } + + ret = pcre2_match(pattern, (const unsigned char *)string, strlen(string), + offset, 0, pcre2_matches, NULL); + + pcre2_match_data_free(pcre2_matches); + + return (ret >= 0); +} +#endif + + /********************************************************************* * * Function : simple_domaincmp @@ -1127,7 +1306,6 @@ static int domain_match(const struct pattern_spec *p, const struct http_request } } -#endif /* def FEATURE_EXTENDED_HOST_PATTERNS */ /********************************************************************* @@ -1166,9 +1344,9 @@ jb_err create_pattern_spec(struct pattern_spec *pattern, char *buf) const unsigned flag; } tag_pattern[] = { { "TAG:", 4, PATTERN_SPEC_TAG_PATTERN}, - #ifdef FEATURE_CLIENT_TAGS +#ifdef FEATURE_CLIENT_TAGS { "CLIENT-TAG:", 11, PATTERN_SPEC_CLIENT_TAG_PATTERN}, - #endif +#endif { "NO-REQUEST-TAG:", 15, PATTERN_SPEC_NO_REQUEST_TAG_PATTERN}, { "NO-RESPONSE-TAG:", 16, PATTERN_SPEC_NO_RESPONSE_TAG_PATTERN} }; @@ -1223,27 +1401,44 @@ void free_pattern_spec(struct pattern_spec *pattern) if (pattern == NULL) return; freez(pattern->spec); -#ifdef FEATURE_EXTENDED_HOST_PATTERNS + + if (!(pattern->flags & PATTERN_SPEC_URL_PATTERN)) + { + if (pattern->pattern.tag_regex) + { +#ifdef HAVE_PCRE2 + pcre2_code_free(pattern->pattern.tag_regex); +#else + regfree(pattern->pattern.tag_regex); + freez(pattern->pattern.tag_regex); +#endif + } + return; + } + +#ifdef FEATURE_PCRE_HOST_PATTERNS if (pattern->pattern.url_spec.host_regex) { +#ifdef HAVE_PCRE2 + pcre2_code_free(pattern->pattern.url_spec.host_regex); +#else regfree(pattern->pattern.url_spec.host_regex); freez(pattern->pattern.url_spec.host_regex); +#endif } -#else +#endif /* def FEATURE_PCRE_HOST_PATTERNS */ freez(pattern->pattern.url_spec.dbuffer); freez(pattern->pattern.url_spec.dvec); pattern->pattern.url_spec.dcount = 0; -#endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */ freez(pattern->pattern.url_spec.port_list); if (pattern->pattern.url_spec.preg) { +#ifdef HAVE_PCRE2 + pcre2_code_free(pattern->pattern.url_spec.preg); +#else regfree(pattern->pattern.url_spec.preg); freez(pattern->pattern.url_spec.preg); - } - if (pattern->pattern.tag_regex) - { - regfree(pattern->pattern.tag_regex); - freez(pattern->pattern.tag_regex); +#endif } } @@ -1284,12 +1479,20 @@ static int host_matches(const struct http_request *http, const struct pattern_spec *pattern) { assert(http->host != NULL); -#ifdef FEATURE_EXTENDED_HOST_PATTERNS - return ((NULL == pattern->pattern.url_spec.host_regex) - || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0))); +#ifdef FEATURE_PCRE_HOST_PATTERNS + if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN) + { + return ((NULL == pattern->pattern.url_spec.host_regex) +#ifdef HAVE_PCRE2 + || pcre2_pattern_matches(pattern->pattern.url_spec.host_regex, + http->host)); #else - return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http))); + || (0 == regexec(pattern->pattern.url_spec.host_regex, + http->host, 0, NULL, 0))); #endif + } +#endif + return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http))); } @@ -1309,7 +1512,11 @@ static int host_matches(const struct http_request *http, static int path_matches(const char *path, const struct pattern_spec *pattern) { return ((NULL == pattern->pattern.url_spec.preg) +#ifdef HAVE_PCRE2 + || (pcre2_pattern_matches(pattern->pattern.url_spec.preg, path))); +#else || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0))); +#endif }