From 2935a01f7fffce53f81e235557f3ae5459e200e6 Mon Sep 17 00:00:00 2001 From: oes Date: Thu, 31 May 2001 17:35:20 +0000 Subject: [PATCH] - Enhanced domain part globbing with infix and prefix asterisk matching and optional unanchored operation --- filters.c | 51 +++++++++++++++++-------------- loaders.c | 7 ++++- miscutil.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++- miscutil.h | 10 ++++++- project.h | 7 +++-- 5 files changed, 136 insertions(+), 27 deletions(-) diff --git a/filters.c b/filters.c index e5497c2e..2de5e29d 100644 --- a/filters.c +++ b/filters.c @@ -1,4 +1,4 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.10 2001/05/29 09:50:24 jongfoster Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.11 2001/05/29 11:53:23 oes Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ @@ -38,6 +38,9 @@ const char filters_rcs[] = "$Id: filters.c,v 1.10 2001/05/29 09:50:24 jongfoster * * Revisions : * $Log: filters.c,v $ + * Revision 1.11 2001/05/29 11:53:23 oes + * "See why" link added to "blocked" page + * * Revision 1.10 2001/05/29 09:50:24 jongfoster * Unified blocklist/imagelist/permissionslist. * File format is still under discussion, but the internal changes @@ -1045,13 +1048,7 @@ struct url_spec dsplit(char *domain) memset(ret, '\0', sizeof(*ret)); - if ((p = strrchr(domain, '.'))) - { - if (*(++p) == '\0') - { - ret->toplevel = 1; - } - } + ret->unanchored = (domain[strlen(domain) - 1] == '.'); ret->dbuf = strdup(domain); @@ -1075,6 +1072,7 @@ struct url_spec dsplit(char *domain) memcpy(ret->dvec, v, size); } + return(*ret); } @@ -1085,10 +1083,17 @@ struct url_spec dsplit(char *domain) * Function : domaincmp * * Description : Compare domain names. - * domaincmp("a.b.c" , "a.b.c") => 0 (MATCH) + * domaincmp("a.b.c", "a.b.c") => 0 (MATCH) * domaincmp("a*.b.c", "a.b.c") => 0 (MATCH) + * domaincmp("a*.b.c", "abc.b.c") => 0 (MATCH) + * domaincmp("a*c.b.c","abbc.b.c") => 0 (MATCH) + * domaincmp("*a.b.c", "dabc.b.c") => 0 (MATCH) * domaincmp("b.c" , "a.b.c") => 0 (MATCH) + * domaincmp("a.b" , "a.b.c") => 1 (DIFF) + * domaincmp("a.b." , "a.b.c") => 0 (MATCH) * domaincmp("" , "a.b.c") => 0 (MATCH) + * + * FIXME: I need a definition! * * Parameters : * 1 : pattern = a domain that may contain a '*' as a wildcard. @@ -1104,27 +1109,29 @@ int domaincmp(struct url_spec *pattern, struct url_spec *fqdn) char *p, *f; /* chars */ pv = pattern->dvec; - pn = pattern->dcnt; - fv = fqdn->dvec; - fn = fqdn->dcnt; + fn = pn = 0; - while ((pn > 0) && (fn > 0)) + while (fn < fqdn->dcnt && pn < pattern->dcnt) { - p = pv[--pn]; - f = fv[--fn]; + p = pv[pn]; + f = fv[fn]; - while (*p && *f && (*p == tolower(*f))) + if (trivimatch(p, f)) { - p++, f++; + if(pn) + { + return 1; + } } - - if ((*p != tolower(*f)) && (*p != '*')) return(1); + else + { + pn++; + } + fn++; } - if (pn > 0) return(1); - - return(0); + return ((pn < pattern->dcnt) || ((fn < fqdn->dcnt) && !pattern->unanchored)); } diff --git a/loaders.c b/loaders.c index 2eb30075..a00b36d4 100644 --- a/loaders.c +++ b/loaders.c @@ -1,4 +1,4 @@ -const char loaders_rcs[] = "$Id: loaders.c,v 1.10 2001/05/29 09:50:24 jongfoster Exp $"; +const char loaders_rcs[] = "$Id: loaders.c,v 1.11 2001/05/29 23:25:24 oes Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/loaders.c,v $ @@ -35,6 +35,10 @@ const char loaders_rcs[] = "$Id: loaders.c,v 1.10 2001/05/29 09:50:24 jongfoster * * Revisions : * $Log: loaders.c,v $ + * Revision 1.11 2001/05/29 23:25:24 oes + * + * - load_config_line() and load_permissions_file() now use chomp() + * * Revision 1.10 2001/05/29 09:50:24 jongfoster * Unified blocklist/imagelist/permissionslist. * File format is still under discussion, but the internal changes @@ -418,6 +422,7 @@ static int create_url_spec(struct url_spec * url, char * buf) url->dbuf = tmp_url->dbuf; url->dcnt = tmp_url->dcnt; url->dvec = tmp_url->dvec; + url->unanchored = tmp_url->unanchored; return 0; /* OK */ } diff --git a/miscutil.c b/miscutil.c index 2afa5cf7..c3c270c5 100644 --- a/miscutil.c +++ b/miscutil.c @@ -1,4 +1,4 @@ -const char miscutil_rcs[] = "$Id: miscutil.c,v 1.2 2001/05/29 09:50:24 jongfoster Exp $"; +const char miscutil_rcs[] = "$Id: miscutil.c,v 1.3 2001/05/29 23:10:09 oes Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/miscutil.c,v $ @@ -36,6 +36,12 @@ const char miscutil_rcs[] = "$Id: miscutil.c,v 1.2 2001/05/29 09:50:24 jongfoste * * Revisions : * $Log: miscutil.c,v $ + * Revision 1.3 2001/05/29 23:10:09 oes + * + * + * - Introduced chomp() + * - Moved strsav() from showargs to miscutil + * * Revision 1.2 2001/05/29 09:50:24 jongfoster * Unified blocklist/imagelist/permissionslist. * File format is still under discussion, but the internal changes @@ -385,6 +391,86 @@ char *strsav(char *old, const char *text_to_append) } + +/********************************************************************* + * + * Function : trivimatch + * + * Description : Trivial string matching, with only one metacharacter, + * namely '*', which stands for zero or more arbitrary + * characters. + * + * Note: The * is greedy, i.e. it will try to match as + * much text es possible. + * + * Parameters : + * 1 : pattern = pattern for matching + * 2 : text = text to be matched + * + * Returns : 0 if match, else nonzero + * + *********************************************************************/ +int trivimatch(char *pattern, char *text) +{ + char *fallback; + char *pat = pattern; + char *txt = text; + int wildcard = 0; + + while (*txt) + { + /* EOF pattern but !EOF text? */ + if (*pat == '\0') + { + return 1; + } + + /* '*' in the pattern? */ + if (*pat == '*') + { + + /* The pattern ends afterwards? Speed up the return. */ + if (*++pat == '\0') + { + return 0; + } + + /* Else, set wildcard mode and remember position after '*' */ + wildcard = 1; + fallback = pat; + } + + /* Compare: */ + if (*pat != *txt) + { + /* In wildcard mode, just try again */ + if(wildcard) + { + /* Without wildcard mode, this is fatal! */ + pat = fallback; + } + + /* Bad luck otherwise */ + else + { + return 1; + } + } + /* We had a match, advance */ + else + { + pat++; + } + txt++; + } + + if(*pat == '*') pat++; + + /* Hey, we've made it all the way through! */ + return(*pat); + +} + /* Local Variables: tab-width: 3 diff --git a/miscutil.h b/miscutil.h index 8dd55b11..02a54d22 100644 --- a/miscutil.h +++ b/miscutil.h @@ -1,6 +1,6 @@ #ifndef _MISCUTIL_H #define _MISCUTIL_H -#define MISCUTIL_H_VERSION "$Id: miscutil.h,v 1.2 2001/05/29 09:50:24 jongfoster Exp $" +#define MISCUTIL_H_VERSION "$Id: miscutil.h,v 1.3 2001/05/29 23:10:09 oes Exp $" /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/miscutil.h,v $ @@ -37,6 +37,12 @@ * * Revisions : * $Log: miscutil.h,v $ + * Revision 1.3 2001/05/29 23:10:09 oes + * + * + * - Introduced chomp() + * - Moved strsav() from showargs to miscutil + * * Revision 1.2 2001/05/29 09:50:24 jongfoster * Unified blocklist/imagelist/permissionslist. * File format is still under discussion, but the internal changes @@ -81,7 +87,9 @@ extern int strcmpic(const char *s1, const char *s2); extern int strncmpic(const char *s1, const char *s2, size_t n); extern char *strsav(char *old, const char *text_to_append); + extern char *chomp(char *string); +extern int trivimatch(char *pattern, char *text); #ifdef __MINGW32__ extern char *strdup(const char *s); diff --git a/project.h b/project.h index 281205be..0cb3aba5 100644 --- a/project.h +++ b/project.h @@ -1,6 +1,6 @@ #ifndef _PROJECT_H #define _PROJECT_H -#define PROJECT_H_VERSION "$Id: project.h,v 1.7 2001/05/29 09:50:24 jongfoster Exp $" +#define PROJECT_H_VERSION "$Id: project.h,v 1.8 2001/05/29 20:09:15 joergs Exp $" /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/project.h,v $ @@ -36,6 +36,9 @@ * * Revisions : * $Log: project.h,v $ + * Revision 1.8 2001/05/29 20:09:15 joergs + * HTTP_REDIRECT_TEMPLATE fixed. + * * Revision 1.7 2001/05/29 09:50:24 jongfoster * Unified blocklist/imagelist/permissionslist. * File format is still under discussion, but the internal changes @@ -405,7 +408,7 @@ struct url_spec char *dbuf; char **dvec; int dcnt; - int toplevel; + int unanchored; char *path; int pathlen; -- 2.39.2