-const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.22 2008/03/30 15:02:32 fabiankeil Exp $";
+const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.27 2008/04/08 15:44:33 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
* Purpose : Declares functions to match URLs against URL
* patterns.
*
- * Copyright : Written by and Copyright (C) 2001-2003, 2006-2007 the SourceForge
+ * Copyright : Written by and Copyright (C) 2001-2003, 2006-2008 the SourceForge
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
*
* Revisions :
* $Log: urlmatch.c,v $
+ * Revision 1.27 2008/04/08 15:44:33 fabiankeil
+ * Save a bit of memory (and a few cpu cycles) by not bothering to
+ * compile slash-only path regexes that don't affect the result.
+ *
+ * Revision 1.26 2008/04/07 16:57:18 fabiankeil
+ * - Use free_url_spec() more consistently.
+ * - Let it reset url->dcount just in case.
+ *
+ * Revision 1.25 2008/04/06 15:18:38 fabiankeil
+ * Oh well, rename the --enable-pcre-host-patterns option to
+ * --enable-extended-host-patterns as it's not really PCRE syntax.
+ *
+ * Revision 1.24 2008/04/06 14:54:26 fabiankeil
+ * Use PCRE syntax in host patterns when configured
+ * with --enable-pcre-host-patterns.
+ *
+ * Revision 1.23 2008/04/05 12:19:20 fabiankeil
+ * Factor compile_host_pattern() out of create_url_spec().
+ *
* Revision 1.22 2008/03/30 15:02:32 fabiankeil
* SZitify unknown_method().
*
}
+#ifdef FEATURE_EXTENDED_HOST_PATTERNS
+/*********************************************************************
+ *
+ * Function : compile_host_pattern
+ *
+ * Description : Parses and compiles a PCRE host pattern..
+ *
+ * Parameters :
+ * 1 : url = Target url_spec to be filled in.
+ * 2 : host_pattern = Host pattern to compile.
+ *
+ * Returns : JB_ERR_OK - Success
+ * JB_ERR_MEMORY - Out of memory
+ * JB_ERR_PARSE - Cannot parse regex
+ *
+ *********************************************************************/
+static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
+{
+ int errcode;
+ char rebuf[BUFFER_SIZE];
+
+ assert(host_pattern);
+ assert(strlen(host_pattern) < sizeof(rebuf) - 2);
+
+ url->host_regex = zalloc(sizeof(*url->host_regex));
+ if (NULL == url->host_regex)
+ {
+ free_url_spec(url);
+ return JB_ERR_MEMORY;
+ }
+
+ snprintf(rebuf, sizeof(rebuf), "%s$", host_pattern);
+
+ errcode = regcomp(url->host_regex, rebuf,
+ (REG_EXTENDED|REG_NOSUB|REG_ICASE));
+
+ if (errcode)
+ {
+ size_t errlen = regerror(errcode, url->host_regex, rebuf, sizeof(rebuf));
+ if (errlen > (sizeof(rebuf) - (size_t)1))
+ {
+ errlen = sizeof(rebuf) - (size_t)1;
+ }
+ rebuf[errlen] = '\0';
+ log_error(LOG_LEVEL_ERROR, "error compiling %s: %s", url->spec, rebuf);
+ free_url_spec(url);
+
+ return JB_ERR_PARSE;
+ }
+
+ return JB_ERR_OK;
+
+}
+
+#else
+
/*********************************************************************
*
* Function : compile_host_pattern
url->dbuffer = strdup(host_pattern);
if (NULL == url->dbuffer)
{
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
+ free_url_spec(url);
return JB_ERR_MEMORY;
}
if (url->dcount < 0)
{
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
- freez(url->dbuffer);
- url->dcount = 0;
+ free_url_spec(url);
return JB_ERR_MEMORY;
}
else if (url->dcount != 0)
url->dvec = (char **)malloc(size);
if (NULL == url->dvec)
{
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
- freez(url->dbuffer);
- url->dcount = 0;
+ free_url_spec(url);
return JB_ERR_MEMORY;
}
}
}
+#endif /* def FEATURE_EXTENDED_HOST_PATTERNS */
/*********************************************************************
errlen = sizeof(rebuf) - 1;
}
rebuf[errlen] = '\0';
-
log_error(LOG_LEVEL_ERROR, "error compiling %s: %s", url->spec, rebuf);
-
- freez(url->spec);
- regfree(url->tag_regex);
- freez(url->tag_regex);
+ free_url_spec(url);
return JB_ERR_PARSE;
}
p = strchr(buf, '/');
if (NULL != p)
{
- url->path = strdup(p);
- if (NULL == url->path)
+ if (*(p+1) != '\0')
{
- freez(url->spec);
- return JB_ERR_MEMORY;
+ url->path = strdup(p);
+ if (NULL == url->path)
+ {
+ free_url_spec(url);
+ return JB_ERR_MEMORY;
+ }
+ }
+ else
+ {
+ /*
+ * The path pattern is a single slash and can
+ * be ignored as it won't affect the result.
+ */
+ assert(NULL == url->path);
+ url->path = NULL;
}
*p = '\0';
}
{
if (NULL == (url->preg = zalloc(sizeof(*url->preg))))
{
- freez(url->spec);
- freez(url->path);
+ free_url_spec(url);
return JB_ERR_MEMORY;
}
errlen = sizeof(rebuf) - (size_t)1;
}
rebuf[errlen] = '\0';
-
log_error(LOG_LEVEL_ERROR, "error compiling %s: %s",
url->spec, rebuf);
-
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
+ free_url_spec(url);
return JB_ERR_PARSE;
}
if (url == NULL) return;
freez(url->spec);
+#ifdef FEATURE_EXTENDED_HOST_PATTERNS
+ if (url->host_regex)
+ {
+ regfree(url->host_regex);
+ freez(url->host_regex);
+ }
+#else
freez(url->dbuffer);
freez(url->dvec);
+ url->dcount = 0;
+#endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
freez(url->path);
freez(url->port_list);
if (url->preg)
*
*********************************************************************/
int url_match(const struct url_spec *pattern,
- const struct http_request *url)
+ const struct http_request *http)
{
/* XXX: these should probably be functions. */
-#define PORT_MATCHES ((NULL == pattern->port_list) || match_portlist(pattern->port_list, url->port))
-#define DOMAIN_MATCHES ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, url)))
-#define PATH_MATCHES ((NULL == pattern->path) || (0 == regexec(pattern->preg, url->path, 0, NULL, 0)))
+#define PORT_MATCHES ((NULL == pattern->port_list) || match_portlist(pattern->port_list, http->port))
+#ifdef FEATURE_EXTENDED_HOST_PATTERNS
+#define DOMAIN_MATCHES ((NULL == pattern->host_regex) || (0 == regexec(pattern->host_regex, http->host, 0, NULL, 0)))
+#else
+#define DOMAIN_MATCHES ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, http)))
+#endif
+#define PATH_MATCHES ((NULL == pattern->path) || (0 == regexec(pattern->preg, http->path, 0, NULL, 0)))
if (pattern->tag_regex != NULL)
{