79) Evaluate pcre alternatives.
-80) Change FEATURE_EXTENDED_HOST_PATTERNS to support both
- extended and vanilla host patterns at the same time.
-
- Note that the requirement is to allow the user to decide
- if the domain pattern should be interpreted as regex or
- traditional host pattern and if it's not obvious that the
- user made any decision, default to the latter.
-
- Possible solutions would be:
-
- 1. An always-use-regex-domain-patterns config option
- 2. An enable-regex-domain-patterns-for-this-action-file option
- 3. An enable-regex-domain-patterns-for-this-action-file-until-the-user-says-otherwise option
- 4. A treat-the-domain-pattern-in-this-line-as-regex(-or-not) option
- 5. Combinations of the options above
-
- With 2+4, 3+4 or 2+3+4 being the preferences until
- further discussion.
-
82) Detect if the system time goes back in time let the user
know if it caused any connections to get closed.
/*
* Allow PCRE syntax in host patterns.
*/
-#undef FEATURE_EXTENDED_HOST_PATTERNS
+#undef FEATURE_PCRE_HOST_PATTERNS
/*
* Allow filtering with scripts and programs.
AC_DEFINE(FEATURE_GRACEFUL_TERMINATION)
fi])
-AC_ARG_ENABLE(extended-host-patterns,
-[ --enable-extended-host-patterns Enable and require PCRE syntax in host patterns. This feature hasn't
- been announced yet and it's not clear if it's a good idea. It's expected
- to work, but undocumented. You should only enable it if you know what
- PCRE is and are sure that you need it for your host patterns. You can
- use tools/url-pattern-translator.pl to convert existing action files to
- use PCRE host patterns. Please don't enable this option when creating
- packages for others that may not be expecting it.],
+AC_ARG_ENABLE(pcre-host-patterns,
+[ --enable-pcre-host-patterns Allow to use PCRE syntax in host patterns by prefixing the pattern with
+ "PCRE-HOST-PATTERN:". You can use tools/url-pattern-translator.pl to
+ convert existing action files to use PCRE host patterns.],
[if test $enableval = yes; then
- AC_DEFINE(FEATURE_EXTENDED_HOST_PATTERNS)
+ AC_DEFINE(FEATURE_PCRE_HOST_PATTERNS)
fi])
AC_ARG_ENABLE(external-filters,
While flexible, this is not the sophistication of full regular expression based syntax.
</para>
+<para>
+ When compiled with FEATURE_PCRE_HOST_PATTERNS patterns can be prefixed with
+ <quote>PCRE-HOST-PATTERN:</quote> in which case full regular expression
+ (PCRE) can be used for the host pattern as well.
+</para>
+
</sect3>
<!-- ~ End section ~ -->
</dl>
</div>
<p>While flexible, this is not the sophistication of full regular expression based syntax.</p>
+ <p>When compiled with FEATURE_PCRE_HOST_PATTERNS patterns can be prefixed with <span class=
+ "QUOTE">"PCRE-HOST-PATTERN:"</span> in which case full regular expression (PCRE) can be used for the host
+ pattern as well.</p>
</div>
<div class="SECT3">
<h3 class="SECT3"><a name="PATH-PATTERN" id="PATH-PATTERN">8.4.2. The Path Pattern</a></h3>
these. If not, you will get a friendly error message. Internet access is not necessary either.</p>
<ul>
<li>
- <p>Privoxy main page:</p><a name="AEN6301" id="AEN6301"></a>
+ <p>Privoxy main page:</p><a name="AEN6303" id="AEN6303"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/" target="_top">http://config.privoxy.org/</a></p>
</blockquote>
"APPLICATION">Privoxy</span>)</p>
</li>
<li>
- <p>View and toggle client tags:</p><a name="AEN6309" id="AEN6309"></a>
+ <p>View and toggle client tags:</p><a name="AEN6311" id="AEN6311"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/client-tags" target=
"_top">http://config.privoxy.org/client-tags</a></p>
</li>
<li>
<p>Show information about the current configuration, including viewing and editing of actions
- files:</p><a name="AEN6314" id="AEN6314"></a>
+ files:</p><a name="AEN6316" id="AEN6316"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/show-status" target=
"_top">http://config.privoxy.org/show-status</a></p>
</blockquote>
</li>
<li>
- <p>Show the browser's request headers:</p><a name="AEN6319" id="AEN6319"></a>
+ <p>Show the browser's request headers:</p><a name="AEN6321" id="AEN6321"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/show-request" target=
"_top">http://config.privoxy.org/show-request</a></p>
</blockquote>
</li>
<li>
- <p>Show which actions apply to a URL and why:</p><a name="AEN6324" id="AEN6324"></a>
+ <p>Show which actions apply to a URL and why:</p><a name="AEN6326" id="AEN6326"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/show-url-info" target=
"_top">http://config.privoxy.org/show-url-info</a></p>
<li>
<p>Toggle Privoxy on or off. This feature can be turned off/on in the main <tt class="FILENAME">config</tt>
file. When toggled <span class="QUOTE">"off"</span>, <span class="QUOTE">"Privoxy"</span> continues to run,
- but only as a pass-through proxy, with no actions taking place:</p><a name="AEN6332" id="AEN6332"></a>
+ but only as a pass-through proxy, with no actions taking place:</p><a name="AEN6334" id="AEN6334"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/toggle" target="_top">http://config.privoxy.org/toggle</a></p>
</blockquote>
- <p>Short cuts. Turn off, then on:</p><a name="AEN6336" id="AEN6336"></a>
+ <p>Short cuts. Turn off, then on:</p><a name="AEN6338" id="AEN6338"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/toggle?set=disable" target=
"_top">http://config.privoxy.org/toggle?set=disable</a></p>
- </blockquote><a name="AEN6339" id="AEN6339"></a>
+ </blockquote><a name="AEN6341" id="AEN6341"></a>
<blockquote class="BLOCKQUOTE">
<p><a href="http://config.privoxy.org/toggle?set=enable" target=
"_top">http://config.privoxy.org/toggle?set=enable</a></p>
}
else if (JB_ERR_OK == get_destination_from_headers(headers, csp->http))
{
-#ifndef FEATURE_EXTENDED_HOST_PATTERNS
/* Split the domain we just got for pattern matching */
init_domain_components(csp->http);
-#endif
return JB_ERR_OK;
}
return JB_ERR_PARSE;
}
-#ifndef FEATURE_EXTENDED_HOST_PATTERNS
/* Split the domain we just got for pattern matching */
init_domain_components(csp->http);
-#endif
#ifdef FEATURE_TOGGLE
if ((csp->flags & CSP_FLAG_TOGGLED_ON) != 0)
char *host_ip_addr_str; /**< String with dotted decimal representation
of host's IP. NULL before connect_to() */
-#ifndef FEATURE_EXTENDED_HOST_PATTERNS
char *dbuffer; /**< Buffer with '\0'-delimited domain name. */
char **dvec; /**< List of pointers to the strings in dbuffer. */
int dcount; /**< How many parts to this domain? (length of dvec) */
-#endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
#ifdef FEATURE_HTTPS_INSPECTION
int client_ssl; /**< Flag if we should communicate with client over ssl */
struct url_spec
{
-#ifdef FEATURE_EXTENDED_HOST_PATTERNS
+#ifdef FEATURE_PCRE_HOST_PATTERNS
regex_t *host_regex;/**< Regex for host matching */
-#else
+ enum host_regex_type { VANILLA_HOST_PATTERN, PCRE_HOST_PATTERN } host_regex_type;
+#endif /* defined FEATURE_PCRE_HOST_PATTERNS */
char *dbuffer; /**< Buffer with '\0'-delimited domain name, or NULL to match all hosts. */
char **dvec; /**< List of pointers to the strings in dbuffer. */
int dcount; /**< How many parts to this domain? (length of dvec) */
int unanchored; /**< Bitmap - flags are ANCHOR_LEFT and ANCHOR_RIGHT. */
-#endif /* defined FEATURE_EXTENDED_HOST_PATTERNS */
char *port_list; /**< List of acceptable ports, or NULL to match all ports */
for (p = host; *p; p++)
{
- if (*p != '.')
+ if ((*p != '.') && !privoxy_isdigit(*p))
{
- if (!privoxy_isdigit(*p))
- {
- /* Not a dot or digit so it can't be an IPv4 address. */
- return 0;
- }
+ /* Not a dot or digit so it can't be an IPv4 address. */
+ return 0;
}
}
#
# url-pattern-translator.pl old.action > new.action
#
-# Only convert your files once, or, as RoboCop used to say,
-# there will be... trouble.
-#
# Copyright (c) 2008 Fabian Keil <fk@fabiankeil.de>
#
# Permission to use, copy, modify, and distribute this software for any
# Match single character with a dot.
$hp =~ s@(?<!\))\?@.@g;
+ # Add the prefix
+ $hp = "PCRE-HOST-PATTERN:" . $hp;
+
return $hp;
}
$type_to_skip = "whitespace";
- } elsif (m@^\s*TAG:@) {
+ } elsif (m@^\s*CLIENT-TAG:@i) {
+
+ $type_to_skip = "client tag patttern";
+
+ } elsif (m@^\s*TAG:@i) {
$type_to_skip = "tag patttern";
$type_to_skip = "predefined settings";
+ } elsif (m@^\s*PCRE-HOST-PATTERN:@i) {
+
+ $type_to_skip = "already converted pcre host patttern";
+
}
#p("Skipping " . $type_to_skip . ": " . $_) if defined $type_to_skip;
* Purpose : Declares functions to match URLs against URL
* patterns.
*
- * Copyright : Written by and Copyright (C) 2001-2014
+ * Copyright : Written by and Copyright (C) 2001-2020
* the Privoxy team. https://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
RIGHT_ANCHORED,
RIGHT_ANCHORED_HOST
};
-static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern);
+static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern);
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern);
+#endif
/*********************************************************************
*
freez(http->path);
freez(http->version);
freez(http->host_ip_addr_str);
-#ifndef FEATURE_EXTENDED_HOST_PATTERNS
freez(http->dbuffer);
freez(http->dvec);
http->dcount = 0;
-#endif
}
-#ifndef FEATURE_EXTENDED_HOST_PATTERNS
/*********************************************************************
*
* Function : init_domain_components
return JB_ERR_OK;
}
-#endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
/*********************************************************************
freez(buf);
}
-#ifdef FEATURE_EXTENDED_HOST_PATTERNS
- return JB_ERR_OK;
-#else
/* Split domain name so we can compare it against wildcards */
return init_domain_components(http);
-#endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
}
{
char *p;
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+ const size_t prefix_length = 18;
+ if (strncmpic(buf, "PCRE-HOST-PATTERN:", prefix_length) == 0)
+ {
+ url->pattern.url_spec.host_regex_type = PCRE_HOST_PATTERN;
+ /* Overwrite the "PCRE-HOST-PATTERN:" prefix */
+ memmove(buf, buf+prefix_length, strlen(buf+prefix_length)+1);
+ }
+ else
+ {
+ url->pattern.url_spec.host_regex_type = VANILLA_HOST_PATTERN;
+ }
+#endif
+
p = strchr(buf, '/');
if (NULL != p)
{
if (buf[0] != '\0')
{
- return compile_host_pattern(url, buf);
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+ if (url->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
+ {
+ return compile_pcre_host_pattern(url, buf);
+ }
+ else
+#endif
+ {
+ return compile_vanilla_host_pattern(url, buf);
+ }
}
return JB_ERR_OK;
}
-#ifdef FEATURE_EXTENDED_HOST_PATTERNS
+#ifdef FEATURE_PCRE_HOST_PATTERNS
/*********************************************************************
*
- * Function : compile_host_pattern
+ * Function : compile_pcre_host_pattern
*
- * Description : Parses and compiles a host pattern.
+ * Description : Parses and compiles a pcre host pattern.
*
* Parameters :
* 1 : url = Target pattern_spec to be filled in.
* JB_ERR_PARSE - Cannot parse regex
*
*********************************************************************/
-static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
+static jb_err compile_pcre_host_pattern(struct pattern_spec *url, const char *host_pattern)
{
return compile_pattern(host_pattern, RIGHT_ANCHORED_HOST, url, &url->pattern.url_spec.host_regex);
}
+#endif /* def FEATURE_PCRE_HOST_PATTERNS */
-#else
/*********************************************************************
*
- * Function : compile_host_pattern
+ * Function : compile_vanilla_host_pattern
*
* Description : Parses and "compiles" an old-school host pattern.
*
* JB_ERR_PARSE - Cannot parse regex
*
*********************************************************************/
-static jb_err compile_host_pattern(struct pattern_spec *url, const char *host_pattern)
+static jb_err compile_vanilla_host_pattern(struct pattern_spec *url, const char *host_pattern)
{
char *v[150];
size_t size;
}
}
-#endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
/*********************************************************************
if (pattern == NULL) return;
freez(pattern->spec);
-#ifdef FEATURE_EXTENDED_HOST_PATTERNS
+#ifdef FEATURE_PCRE_HOST_PATTERNS
if (pattern->pattern.url_spec.host_regex)
{
regfree(pattern->pattern.url_spec.host_regex);
freez(pattern->pattern.url_spec.host_regex);
}
-#else
+#endif /* def FEATURE_PCRE_HOST_PATTERNS */
freez(pattern->pattern.url_spec.dbuffer);
freez(pattern->pattern.url_spec.dvec);
pattern->pattern.url_spec.dcount = 0;
-#endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
freez(pattern->pattern.url_spec.port_list);
if (pattern->pattern.url_spec.preg)
{
const struct pattern_spec *pattern)
{
assert(http->host != NULL);
-#ifdef FEATURE_EXTENDED_HOST_PATTERNS
- return ((NULL == pattern->pattern.url_spec.host_regex)
- || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0)));
-#else
- return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+ if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN) {
+ return ((NULL == pattern->pattern.url_spec.host_regex)
+ || (0 == regexec(pattern->pattern.url_spec.host_regex,
+ http->host, 0, NULL, 0)));
+ }
#endif
+ return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
}
#include "project.h"
extern void free_http_request(struct http_request *http);
-#ifndef FEATURE_EXTENDED_HOST_PATTERNS
extern jb_err init_domain_components(struct http_request *http);
-#endif
extern jb_err parse_http_request(const char *req, struct http_request *http);
extern jb_err parse_http_url(const char *url,
struct http_request *http,