Advertise IPv6 support on the show-status page.

[privoxy.git] / urlmatch.c
diff --git a/urlmatch.c b/urlmatch.c

index 77c6391..b1e72b9 100644 (file)
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -1,4 +1,4 @@
-const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.39 2008/04/22 16:27:42 fabiankeil Exp $";
+const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.49 2009/04/17 11:34:35 fabiankeil Exp $";
  /*********************************************************************
   *
   * File        :  $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
@@ -6,8 +6,8 @@ const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.39 2008/04/22 16:27:42 fabianke
   * Purpose     :  Declares functions to match URLs against URL
   *                patterns.
   *
- * Copyright   :  Written by and Copyright (C) 2001-2003, 2006-2008 the SourceForge
- *                Privoxy team. http://www.privoxy.org/
+ * Copyright   :  Written by and Copyright (C) 2001-2009
+ *                the Privoxy team. http://www.privoxy.org/
   *
   *                Based on the Internet Junkbuster originally written
   *                by and Copyright (C) 1997 Anonymous Coders and
@@ -33,6 +33,40 @@ const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.39 2008/04/22 16:27:42 fabianke
   *
   * Revisions   :
   *    $Log: urlmatch.c,v $
+ *    Revision 1.49  2009/04/17 11:34:35  fabiankeil
+ *    Style cosmetics for the IPv6 code.
+ *
+ *    Revision 1.48  2009/04/17 11:27:49  fabiankeil
+ *    Petr Pisar's privoxy-3.0.12-ipv6-3.diff.
+ *
+ *    Revision 1.47  2009/03/02 19:18:10  fabiankeil
+ *    Streamline parse_http_request()'s prototype. As
+ *    cparser pointed out it doesn't actually use csp.
+ *
+ *    Revision 1.46  2009/02/11 19:31:32  fabiankeil
+ *    Reject request lines that end with neither HTTP/1.0 nor HTTP/1.1.
+ *
+ *    Revision 1.45  2008/06/21 21:19:18  fabiankeil
+ *    Silence bogus compiler warning.
+ *
+ *    Revision 1.44  2008/05/04 16:18:32  fabiankeil
+ *    Provide parse_http_url() with a third parameter to specify
+ *    whether or not URLs without protocol are acceptable.
+ *
+ *    Revision 1.43  2008/05/04 13:30:55  fabiankeil
+ *    Streamline parse_http_url()'s prototype.
+ *
+ *    Revision 1.42  2008/05/04 13:24:16  fabiankeil
+ *    If the method isn't CONNECT, reject URLs without protocol.
+ *
+ *    Revision 1.41  2008/05/02 09:51:34  fabiankeil
+ *    In parse_http_url(), don't muck around with values
+ *    that are none of its business: require an initialized
+ *    http structure and never unset http->ssl.
+ *
+ *    Revision 1.40  2008/04/23 16:12:28  fabiankeil
+ *    Free with freez().
+ *
   *    Revision 1.39  2008/04/22 16:27:42  fabiankeil
   *    In parse_http_request(), remove a pointless
   *    temporary variable and free the buffer earlier.
@@ -359,10 +393,9 @@ jb_err init_domain_components(struct http_request *http)
   * Parameters  :
   *          1  :  url = URL (or is it URI?) to break down
   *          2  :  http = pointer to the http structure to hold elements.
- *                       Will be zeroed before use.  Note that this
- *                       function sets the http->gpc and http->ver
- *                       members to NULL.
- *          3  :  csp = Current client state (buffers, headers, etc...)
+ *                       Must be initialized with valid values (like NULLs).
+ *          3  :  require_protocol = Whether or not URLs without
+ *                                   protocol are acceptable.
   *
   * Returns     :  JB_ERR_OK on success
   *                JB_ERR_MEMORY on out of memory
@@ -370,18 +403,10 @@ jb_err init_domain_components(struct http_request *http)
   *                             or >100 domains deep.
   *
   *********************************************************************/
-jb_err parse_http_url(const char * url,
-                      struct http_request *http,
-                      const struct client_state *csp)
+jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
  {
     int host_available = 1; /* A proxy can dream. */
  
-   /*
-    * Zero out the results structure
-    */
-   memset(http, '\0', sizeof(*http));
-
-
     /*
      * Save our initial URL
      */
@@ -429,10 +454,12 @@ jb_err parse_http_url(const char * url,
        if (strncmpic(url_noproto, "http://",  7) == 0)
        {
           url_noproto += 7;
-         http->ssl = 0;
        }
        else if (strncmpic(url_noproto, "https://", 8) == 0)
        {
+         /*
+          * Should only happen when called from cgi_show_url_info().
+          */
           url_noproto += 8;
           http->ssl = 1;
        }
@@ -443,13 +470,13 @@ jb_err parse_http_url(const char * url,
           * Most likely because the client's request
           * was intercepted and redirected into Privoxy.
           */
-         http->ssl = 0;
           http->host = NULL;
           host_available = 0;
        }
-      else
+      else if (require_protocol)
        {
-         http->ssl = 0;
+         freez(buf);
+         return JB_ERR_PARSE;
        }
  
        url_path = strchr(url_noproto, '/');
@@ -519,8 +546,40 @@ jb_err parse_http_url(const char * url,
           host = buf;
        }
  
+      /* Move after hostname before port number */
+      if (*host == '[')
+      {
+         /* Numeric IPv6 address delimited by brackets */
+         host++;
+         port = strchr(host, ']');
+
+         if (port == NULL)
+         {
+            /* Missing closing bracket */
+            freez(buf);
+            return JB_ERR_PARSE;
+         }
+
+         *port++ = '\0';
+
+         if (*port == '\0')
+         {
+            port = NULL;
+         }
+         else if (*port != ':')
+         {
+            /* Garbage after closing bracket */
+            freez(buf);
+            return JB_ERR_PARSE;
+         }
+      }
+      else
+      {
+         /* Plain non-escaped hostname */
+         port = strchr(host, ':');
+      }
+
        /* check if url contains port */
-      port = strchr(host, ':');
        if (port != NULL)
        {
           /* Contains port */
@@ -616,7 +675,6 @@ static int unknown_method(const char *method)
   * Parameters  :
   *          1  :  req = HTTP request line to break down
   *          2  :  http = pointer to the http structure to hold elements
- *          3  :  csp = Current client state (buffers, headers, etc...)
   *
   * Returns     :  JB_ERR_OK on success
   *                JB_ERR_MEMORY on out of memory
@@ -624,9 +682,7 @@ static int unknown_method(const char *method)
   *                                  or >100 domains deep.
   *
   *********************************************************************/
-jb_err parse_http_request(const char *req,
-                          struct http_request *http,
-                          const struct client_state *csp)
+jb_err parse_http_request(const char *req, struct http_request *http)
  {
     char *buf;
     char *v[10]; /* XXX: Why 10? We should only need three. */
@@ -664,7 +720,17 @@ jb_err parse_http_request(const char *req,
        return JB_ERR_PARSE;
     }
  
-   err = parse_http_url(v[1], http, csp);
+   if (strcmpic(v[2], "HTTP/1.1") && strcmpic(v[2], "HTTP/1.0"))
+   {
+      log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
+         "versions are 1.0 and 1.1. This rules out: %s", v[2]);
+      freez(buf);
+      return JB_ERR_PARSE;
+   }
+
+   http->ssl = !strcmpic(v[0], "CONNECT");
+
+   err = parse_http_url(v[1], http, !http->ssl);
     if (err)
     {
        freez(buf);
@@ -674,7 +740,6 @@ jb_err parse_http_request(const char *req,
     /*
      * Copy the details into the structure
      */
-   http->ssl = !strcmpic(v[0], "CONNECT");
     http->cmd = strdup(req);
     http->gpc = strdup(v[0]);
     http->ver = strdup(v[2]);
@@ -718,7 +783,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin
  {
     int errcode;
     char rebuf[BUFFER_SIZE];
-   const char *fmt;
+   const char *fmt = NULL;
  
     assert(pattern);
     assert(strlen(pattern) < sizeof(rebuf) - 2);
@@ -817,7 +882,33 @@ static jb_err compile_url_pattern(struct url_spec *url, char *buf)
        *p = '\0';
     }
  
-   p = strchr(buf, ':');
+   /*
+    * IPv6 numeric hostnames can contain colons, thus we need
+    * to delimit the hostname before the real port separator.
+    * As brackets are already used in the hostname pattern,
+    * we use angle brackets ('<', '>') instead.
+    */
+   if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
+   {
+      *p++ = '\0';
+      buf++;
+
+      if (*p == '\0')
+      {
+         /* IPv6 address without port number */
+         p = NULL;
+      }
+      else if (*p != ':')
+      {
+         /* Garbage after address delimiter */
+         return JB_ERR_PARSE;
+      }
+   }
+   else
+   {
+      p = strchr(buf, ':');
+   }
+
     if (NULL != p)
     {
        *p++ = '\0';
@@ -1418,6 +1509,59 @@ int match_portlist(const char *portlist, int port)
  }
  
  
+/*********************************************************************
+ *
+ * Function    :  parse_forwarder_address
+ *
+ * Description :  Parse out the host and port from a forwarder address.
+ *
+ * Parameters  :
+ *          1  :  address = The forwarder address to parse.
+ *          2  :  hostname = Used to return the hostname. NULL on error.
+ *          3  :  port = Used to return the port. Untouched if no port
+ *                       is specified.
+ *
+ * Returns     :  JB_ERR_OK on success
+ *                JB_ERR_MEMORY on out of memory
+ *                JB_ERR_PARSE on malformed address.
+ *
+ *********************************************************************/
+jb_err parse_forwarder_address(char *address, char **hostname, int *port)
+{
+   char *p = address;
+
+   if ((*address == '[') && (NULL == strchr(address, ']')))
+   {
+      /* XXX: Should do some more validity checks here. */
+      return JB_ERR_PARSE;
+   }
+
+   *hostname = strdup(address);
+   if (NULL == *hostname)
+   {
+      return JB_ERR_MEMORY;
+   }
+
+   if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
+   {
+      *p++ = '\0';
+      memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
+      if (*p == ':')
+      {
+         *port = (int)strtol(++p, NULL, 0);
+      }
+   }
+   else if (NULL != (p = strchr(*hostname, ':')))
+   {
+      *p++ = '\0';
+      *port = (int)strtol(p, NULL, 0);
+   }
+
+   return JB_ERR_OK;
+
+}
+
+
  /*
    Local Variables:
    tab-width: 3