-const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.18 2007/07/30 16:42:21 fabiankeil Exp $";
+const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.23 2008/04/05 12:19:20 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
* Purpose : Declares functions to match URLs against URL
* patterns.
*
- * Copyright : Written by and Copyright (C) 2001-2003, 2006-2007 the SourceForge
+ * Copyright : Written by and Copyright (C) 2001-2003, 2006-2008 the SourceForge
* Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
*
* Revisions :
* $Log: urlmatch.c,v $
+ * Revision 1.23 2008/04/05 12:19:20 fabiankeil
+ * Factor compile_host_pattern() out of create_url_spec().
+ *
+ * Revision 1.22 2008/03/30 15:02:32 fabiankeil
+ * SZitify unknown_method().
+ *
+ * Revision 1.21 2007/12/24 16:34:23 fabiankeil
+ * Band-aid (and micro-optimization) that makes it less likely to run out of
+ * stack space with overly-complex path patterns. Probably masks the problem
+ * reported by Lee in #1856679. Hohoho.
+ *
+ * Revision 1.20 2007/09/02 15:31:20 fabiankeil
+ * Move match_portlist() from filter.c to urlmatch.c.
+ * It's used for url matching, not for filtering.
+ *
+ * Revision 1.19 2007/09/02 13:42:11 fabiankeil
+ * - Allow port lists in url patterns.
+ * - Ditch unused url_spec member pathlen.
+ *
* Revision 1.18 2007/07/30 16:42:21 fabiankeil
* Move the method check into unknown_method()
* and loop through the known methods instead
#include "ssplit.h"
#include "miscutil.h"
#include "errlog.h"
-/*
- * XXX: only for match_portlist() which I will relocate soonish.
- */
-#include "filters.h"
const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
http->dcount = 0;
}
+
/*********************************************************************
*
* Function : init_domain_components
return JB_ERR_OK;
}
+
/*********************************************************************
*
* Function : parse_http_url
*/
"VERSION-CONTROL", "REPORT", "CHECKOUT", "CHECKIN", "UNCHECKOUT",
"MKWORKSPACE", "UPDATE", "LABEL", "MERGE", "BASELINE-CONTROL", "MKACTIVITY",
- NULL
};
int i;
- for (i = 0; NULL != known_http_methods[i]; i++)
+ for (i = 0; i < SZ(known_http_methods); i++)
{
if (0 == strcmpic(method, known_http_methods[i]))
{
}
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+/*********************************************************************
+ *
+ * Function : compile_host_pattern
+ *
+ * Description : Parses and compiles a PCRE host pattern..
+ *
+ * Parameters :
+ * 1 : url = Target url_spec to be filled in.
+ * 2 : host_pattern = Host pattern to compile.
+ *
+ * Returns : JB_ERR_OK - Success
+ * JB_ERR_MEMORY - Out of memory
+ * JB_ERR_PARSE - Cannot parse regex
+ *
+ *********************************************************************/
+static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
+{
+ int errcode;
+ char rebuf[BUFFER_SIZE];
+
+ assert(host_pattern);
+ assert(strlen(host_pattern) < sizeof(rebuf) - 2);
+
+ url->host_regex = zalloc(sizeof(*url->host_regex));
+ if (NULL == url->host_regex)
+ {
+ free_url_spec(url);
+ return JB_ERR_MEMORY;
+ }
+
+ snprintf(rebuf, sizeof(rebuf), "%s$", host_pattern);
+
+ errcode = regcomp(url->host_regex, rebuf,
+ (REG_EXTENDED|REG_NOSUB|REG_ICASE));
+
+ if (errcode)
+ {
+ size_t errlen = regerror(errcode, url->host_regex, rebuf, sizeof(rebuf));
+ if (errlen > (sizeof(rebuf) - (size_t)1))
+ {
+ errlen = sizeof(rebuf) - (size_t)1;
+ }
+ rebuf[errlen] = '\0';
+ log_error(LOG_LEVEL_ERROR, "error compiling %s: %s", url->spec, rebuf);
+ free_url_spec(url);
+
+ return JB_ERR_PARSE;
+ }
+
+ return JB_ERR_OK;
+
+}
+
+#else
+
+/*********************************************************************
+ *
+ * Function : compile_host_pattern
+ *
+ * Description : Parses and "compiles" an old-school host pattern.
+ *
+ * Parameters :
+ * 1 : url = Target url_spec to be filled in.
+ * 2 : host_pattern = Host pattern to parse.
+ *
+ * Returns : JB_ERR_OK - Success
+ * JB_ERR_MEMORY - Out of memory
+ * JB_ERR_PARSE - Cannot parse regex
+ *
+ *********************************************************************/
+static jb_err compile_host_pattern(struct url_spec *url, const char *host_pattern)
+{
+ char *v[150];
+ size_t size;
+ char *p;
+
+ /*
+ * Parse domain part
+ */
+ if (host_pattern[strlen(host_pattern) - 1] == '.')
+ {
+ url->unanchored |= ANCHOR_RIGHT;
+ }
+ if (host_pattern[0] == '.')
+ {
+ url->unanchored |= ANCHOR_LEFT;
+ }
+
+ /*
+ * Split domain into components
+ */
+ url->dbuffer = strdup(host_pattern);
+ if (NULL == url->dbuffer)
+ {
+ freez(url->spec);
+ freez(url->path);
+ regfree(url->preg);
+ freez(url->preg);
+ return JB_ERR_MEMORY;
+ }
+
+ /*
+ * Map to lower case
+ */
+ for (p = url->dbuffer; *p ; p++)
+ {
+ *p = (char)tolower((int)(unsigned char)*p);
+ }
+
+ /*
+ * Split the domain name into components
+ */
+ url->dcount = ssplit(url->dbuffer, ".", v, SZ(v), 1, 1);
+
+ if (url->dcount < 0)
+ {
+ freez(url->spec);
+ freez(url->path);
+ regfree(url->preg);
+ freez(url->preg);
+ freez(url->dbuffer);
+ url->dcount = 0;
+ return JB_ERR_MEMORY;
+ }
+ else if (url->dcount != 0)
+ {
+ /*
+ * Save a copy of the pointers in dvec
+ */
+ size = (size_t)url->dcount * sizeof(*url->dvec);
+
+ url->dvec = (char **)malloc(size);
+ if (NULL == url->dvec)
+ {
+ freez(url->spec);
+ freez(url->path);
+ regfree(url->preg);
+ freez(url->preg);
+ freez(url->dbuffer);
+ url->dcount = 0;
+ return JB_ERR_MEMORY;
+ }
+
+ memcpy(url->dvec, v, size);
+ }
+ /*
+ * else dcount == 0 in which case we needn't do anything,
+ * since dvec will never be accessed and the pattern will
+ * match all domains.
+ */
+ return JB_ERR_OK;
+}
+
+
/*********************************************************************
*
* Function : simple_domaincmp
}
}
+#endif /* def FEATURE_PCRE_HOST_PATTERNS */
/*********************************************************************
if (buf[0] != '\0')
{
- char *v[150];
- size_t size;
-
- /*
- * Parse domain part
- */
- if (buf[strlen(buf) - 1] == '.')
- {
- url->unanchored |= ANCHOR_RIGHT;
- }
- if (buf[0] == '.')
- {
- url->unanchored |= ANCHOR_LEFT;
- }
-
- /*
- * Split domain into components
- */
- url->dbuffer = strdup(buf);
- if (NULL == url->dbuffer)
- {
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
- return JB_ERR_MEMORY;
- }
-
- /*
- * Map to lower case
- */
- for (p = url->dbuffer; *p ; p++)
- {
- *p = (char)tolower((int)(unsigned char)*p);
- }
-
- /*
- * Split the domain name into components
- */
- url->dcount = ssplit(url->dbuffer, ".", v, SZ(v), 1, 1);
-
- if (url->dcount < 0)
- {
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
- freez(url->dbuffer);
- url->dcount = 0;
- return JB_ERR_MEMORY;
- }
- else if (url->dcount != 0)
- {
-
- /*
- * Save a copy of the pointers in dvec
- */
- size = (size_t)url->dcount * sizeof(*url->dvec);
-
- url->dvec = (char **)malloc(size);
- if (NULL == url->dvec)
- {
- freez(url->spec);
- freez(url->path);
- regfree(url->preg);
- freez(url->preg);
- freez(url->dbuffer);
- url->dcount = 0;
- return JB_ERR_MEMORY;
- }
-
- memcpy(url->dvec, v, size);
- }
- /*
- * else dcount == 0 in which case we needn't do anything,
- * since dvec will never be accessed and the pattern will
- * match all domains.
- */
+ return compile_host_pattern(url, buf);
}
return JB_ERR_OK;
if (url == NULL) return;
freez(url->spec);
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+ if (url->host_regex)
+ {
+ regfree(url->host_regex);
+ freez(url->host_regex);
+ }
+#else
freez(url->dbuffer);
freez(url->dvec);
+#endif /* ndef FEATURE_PCRE_HOST_PATTERNS */
freez(url->path);
freez(url->port_list);
if (url->preg)
int url_match(const struct url_spec *pattern,
const struct http_request *url)
{
- int port_matches;
- int domain_matches;
- int path_matches;
+ /* XXX: these should probably be functions. */
+#define PORT_MATCHES ((NULL == pattern->port_list) || match_portlist(pattern->port_list, url->port))
+#ifdef FEATURE_PCRE_HOST_PATTERNS
+#define DOMAIN_MATCHES ((NULL == pattern->host_regex) || (0 == regexec(pattern->host_regex, url->host, 0, NULL, 0)))
+#else
+#define DOMAIN_MATCHES ((NULL == pattern->dbuffer) || (0 == domain_match(pattern, url)))
+#endif
+#define PATH_MATCHES ((NULL == pattern->path) || (0 == regexec(pattern->preg, url->path, 0, NULL, 0)))
if (pattern->tag_regex != NULL)
{
return 0;
}
- port_matches = (NULL == pattern->port_list) || match_portlist(pattern->port_list, url->port);
- domain_matches = (NULL == pattern->dbuffer) || (0 == domain_match(pattern, url));
- path_matches = (NULL == pattern->path) || (0 == regexec(pattern->preg, url->path, 0, NULL, 0));
+ return (PORT_MATCHES && DOMAIN_MATCHES && PATH_MATCHES);
+
+}
- return (port_matches && domain_matches && path_matches);
+
+/*********************************************************************
+ *
+ * Function : match_portlist
+ *
+ * Description : Check if a given number is covered by a comma
+ * separated list of numbers and ranges (a,b-c,d,..)
+ *
+ * Parameters :
+ * 1 : portlist = String with list
+ * 2 : port = port to check
+ *
+ * Returns : 0 => no match
+ * 1 => match
+ *
+ *********************************************************************/
+int match_portlist(const char *portlist, int port)
+{
+ char *min, *max, *next, *portlist_copy;
+
+ min = next = portlist_copy = strdup(portlist);
+
+ /*
+ * Zero-terminate first item and remember offset for next
+ */
+ if (NULL != (next = strchr(portlist_copy, (int) ',')))
+ {
+ *next++ = '\0';
+ }
+
+ /*
+ * Loop through all items, checking for match
+ */
+ while(min)
+ {
+ if (NULL == (max = strchr(min, (int) '-')))
+ {
+ /*
+ * No dash, check for equality
+ */
+ if (port == atoi(min))
+ {
+ free(portlist_copy);
+ return(1);
+ }
+ }
+ else
+ {
+ /*
+ * This is a range, so check if between min and max,
+ * or, if max was omitted, between min and 65K
+ */
+ *max++ = '\0';
+ if(port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
+ {
+ free(portlist_copy);
+ return(1);
+ }
+
+ }
+
+ /*
+ * Jump to next item
+ */
+ min = next;
+
+ /*
+ * Zero-terminate next item and remember offset for n+1
+ */
+ if ((NULL != next) && (NULL != (next = strchr(next, (int) ','))))
+ {
+ *next++ = '\0';
+ }
+ }
+
+ free(portlist_copy);
+ return 0;
}