Relocate two parentheses I misplaced in the previous commit. No functional change...

[privoxy.git] / urlmatch.c
diff --git a/urlmatch.c b/urlmatch.c

index 3dead33..3a9c865 100644 (file)
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -1,4 +1,4 @@
-const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.62 2011/09/04 11:10:56 fabiankeil Exp $";
+const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.69 2012/03/09 16:24:36 fabiankeil Exp $";
  /*********************************************************************
   *
   * File        :  $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
@@ -6,7 +6,7 @@ const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.62 2011/09/04 11:10:56 fabianke
   * Purpose     :  Declares functions to match URLs against URL
   *                patterns.
   *
- * Copyright   :  Written by and Copyright (C) 2001-2009
+ * Copyright   :  Written by and Copyright (C) 2001-2011
   *                the Privoxy team. http://www.privoxy.org/
   *
   *                Based on the Internet Junkbuster originally written
@@ -135,7 +135,7 @@ jb_err init_domain_components(struct http_request *http)
     /* map to lower case */
     for (p = http->dbuffer; *p ; p++)
     {
-      *p = (char)tolower((int)(unsigned char)*p);
+      *p = (char)privoxy_tolower(*p);
     }
  
     /* split the domain name into components */
@@ -167,6 +167,53 @@ jb_err init_domain_components(struct http_request *http)
  #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */
  
  
+/*********************************************************************
+ *
+ * Function    :  url_requires_percent_encoding
+ *
+ * Description :  Checks if an URL contains invalid characters
+ *                according to RFC 3986 that should be percent-encoded.
+ *                Does not verify whether or not the passed string
+ *                actually is a valid URL.
+ *
+ * Parameters  :
+ *          1  :  url = URL to check
+ *
+ * Returns     :  True in case of valid URLs, false otherwise
+ *
+ *********************************************************************/
+int url_requires_percent_encoding(const char *url)
+{
+   static const char allowed_characters[128] = {
+      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+      '\0', '\0', '\0', '!',  '\0', '#',  '$',  '%',  '&',  '\'',
+      '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',  '0',  '1',
+      '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',
+      '\0', '=',  '\0', '?',  '@',  'A',  'B',  'C',  'D',  'E',
+      'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
+      'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',
+      'Z',  '[',  '\0', ']',  '\0', '_',  '\0', 'a',  'b',  'c',
+      'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',
+      'n',  'o',  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
+      'x',  'y',  'z',  '\0', '\0', '\0', '~',  '\0'
+   };
+
+   while (*url != '\0')
+   {
+      const unsigned int i = (unsigned char)*url++;
+      if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
+      {
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+
+}
+
+
  /*********************************************************************
   *
   * Function    :  parse_http_url
@@ -206,8 +253,8 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr
      */
     if (*http->url == '*')
     {
-      if  ( NULL == (http->path = strdup("*"))
-         || NULL == (http->hostport = strdup("")) )
+      if (NULL == (http->path = strdup("*"))
+       || NULL == (http->hostport = strdup("")))
        {
           return JB_ERR_MEMORY;
        }
@@ -290,8 +337,8 @@ jb_err parse_http_url(const char *url, struct http_request *http, int require_pr
  
        freez(buf);
  
-      if ( (http->path == NULL)
-        || (http->hostport == NULL))
+      if ((http->path == NULL)
+       || (http->hostport == NULL))
        {
           return JB_ERR_MEMORY;
        }
@@ -534,7 +581,7 @@ jb_err parse_http_request(const char *req, struct http_request *http)
  
     if ( (http->cmd == NULL)
       || (http->gpc == NULL)
-     || (http->ver == NULL) )
+     || (http->ver == NULL))
     {
        return JB_ERR_MEMORY;
     }
@@ -728,7 +775,7 @@ static jb_err compile_url_pattern(struct url_spec *url, char *buf)
   *
   * Function    :  compile_host_pattern
   *
- * Description :  Parses and compiles a host pattern..
+ * Description :  Parses and compiles a host pattern.
   *
   * Parameters  :
   *          1  :  url = Target url_spec to be filled in.
@@ -794,7 +841,7 @@ static jb_err compile_host_pattern(struct url_spec *url, const char *host_patter
      */
     for (p = url->dbuffer; *p ; p++)
     {
-      *p = (char)tolower((int)(unsigned char)*p);
+      *p = (char)privoxy_tolower(*p);
     }
  
     /*
@@ -924,9 +971,9 @@ static int simplematch(const char *pattern, const char *text)
        /*
         * Char match, or char range match?
         */
-      if ( (*pat == *txt)
-      ||   (*pat == '?')
-      ||   ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))) )
+      if ((*pat == *txt)
+       || (*pat == '?')
+       || ((*pat == ']') && (charmap[*txt / 8] & (1 << (*txt % 8)))))
        {
           /*
            * Success: Go ahead
@@ -962,7 +1009,7 @@ static int simplematch(const char *pattern, const char *text)
     }
  
     /* Cut off extra '*'s */
-   if(*pat == '*')  pat++;
+   if (*pat == '*') pat++;
  
     /* If this is the pattern's end, fine! */
     return(*pat);
@@ -1327,7 +1374,7 @@ int match_portlist(const char *portlist, int port)
            * or, if max was omitted, between min and 65K
            */
           *max++ = '\0';
-         if(port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
+         if (port >= atoi(min) && port <= (atoi(max) ? atoi(max) : 65535))
           {
              freez(portlist_copy);
              return(1);