-const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.39 2008/04/22 16:27:42 fabiankeil Exp $";
+const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.49 2009/04/17 11:34:35 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
* Purpose : Declares functions to match URLs against URL
* patterns.
*
- * Copyright : Written by and Copyright (C) 2001-2003, 2006-2008 the SourceForge
- * Privoxy team. http://www.privoxy.org/
+ * Copyright : Written by and Copyright (C) 2001-2009
+ * the Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
* by and Copyright (C) 1997 Anonymous Coders and
*
* Revisions :
* $Log: urlmatch.c,v $
+ * Revision 1.49 2009/04/17 11:34:35 fabiankeil
+ * Style cosmetics for the IPv6 code.
+ *
+ * Revision 1.48 2009/04/17 11:27:49 fabiankeil
+ * Petr Pisar's privoxy-3.0.12-ipv6-3.diff.
+ *
+ * Revision 1.47 2009/03/02 19:18:10 fabiankeil
+ * Streamline parse_http_request()'s prototype. As
+ * cparser pointed out it doesn't actually use csp.
+ *
+ * Revision 1.46 2009/02/11 19:31:32 fabiankeil
+ * Reject request lines that end with neither HTTP/1.0 nor HTTP/1.1.
+ *
+ * Revision 1.45 2008/06/21 21:19:18 fabiankeil
+ * Silence bogus compiler warning.
+ *
+ * Revision 1.44 2008/05/04 16:18:32 fabiankeil
+ * Provide parse_http_url() with a third parameter to specify
+ * whether or not URLs without protocol are acceptable.
+ *
+ * Revision 1.43 2008/05/04 13:30:55 fabiankeil
+ * Streamline parse_http_url()'s prototype.
+ *
+ * Revision 1.42 2008/05/04 13:24:16 fabiankeil
+ * If the method isn't CONNECT, reject URLs without protocol.
+ *
+ * Revision 1.41 2008/05/02 09:51:34 fabiankeil
+ * In parse_http_url(), don't muck around with values
+ * that are none of its business: require an initialized
+ * http structure and never unset http->ssl.
+ *
+ * Revision 1.40 2008/04/23 16:12:28 fabiankeil
+ * Free with freez().
+ *
* Revision 1.39 2008/04/22 16:27:42 fabiankeil
* In parse_http_request(), remove a pointless
* temporary variable and free the buffer earlier.
* Parameters :
* 1 : url = URL (or is it URI?) to break down
* 2 : http = pointer to the http structure to hold elements.
- * Will be zeroed before use. Note that this
- * function sets the http->gpc and http->ver
- * members to NULL.
- * 3 : csp = Current client state (buffers, headers, etc...)
+ * Must be initialized with valid values (like NULLs).
+ * 3 : require_protocol = Whether or not URLs without
+ * protocol are acceptable.
*
* Returns : JB_ERR_OK on success
* JB_ERR_MEMORY on out of memory
* or >100 domains deep.
*
*********************************************************************/
-jb_err parse_http_url(const char * url,
- struct http_request *http,
- const struct client_state *csp)
+jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol)
{
int host_available = 1; /* A proxy can dream. */
- /*
- * Zero out the results structure
- */
- memset(http, '\0', sizeof(*http));
-
-
/*
* Save our initial URL
*/
if (strncmpic(url_noproto, "http://", 7) == 0)
{
url_noproto += 7;
- http->ssl = 0;
}
else if (strncmpic(url_noproto, "https://", 8) == 0)
{
+ /*
+ * Should only happen when called from cgi_show_url_info().
+ */
url_noproto += 8;
http->ssl = 1;
}
* Most likely because the client's request
* was intercepted and redirected into Privoxy.
*/
- http->ssl = 0;
http->host = NULL;
host_available = 0;
}
- else
+ else if (require_protocol)
{
- http->ssl = 0;
+ freez(buf);
+ return JB_ERR_PARSE;
}
url_path = strchr(url_noproto, '/');
host = buf;
}
+ /* Move after hostname before port number */
+ if (*host == '[')
+ {
+ /* Numeric IPv6 address delimited by brackets */
+ host++;
+ port = strchr(host, ']');
+
+ if (port == NULL)
+ {
+ /* Missing closing bracket */
+ freez(buf);
+ return JB_ERR_PARSE;
+ }
+
+ *port++ = '\0';
+
+ if (*port == '\0')
+ {
+ port = NULL;
+ }
+ else if (*port != ':')
+ {
+ /* Garbage after closing bracket */
+ freez(buf);
+ return JB_ERR_PARSE;
+ }
+ }
+ else
+ {
+ /* Plain non-escaped hostname */
+ port = strchr(host, ':');
+ }
+
/* check if url contains port */
- port = strchr(host, ':');
if (port != NULL)
{
/* Contains port */
* Parameters :
* 1 : req = HTTP request line to break down
* 2 : http = pointer to the http structure to hold elements
- * 3 : csp = Current client state (buffers, headers, etc...)
*
* Returns : JB_ERR_OK on success
* JB_ERR_MEMORY on out of memory
* or >100 domains deep.
*
*********************************************************************/
-jb_err parse_http_request(const char *req,
- struct http_request *http,
- const struct client_state *csp)
+jb_err parse_http_request(const char *req, struct http_request *http)
{
char *buf;
char *v[10]; /* XXX: Why 10? We should only need three. */
return JB_ERR_PARSE;
}
- err = parse_http_url(v[1], http, csp);
+ if (strcmpic(v[2], "HTTP/1.1") && strcmpic(v[2], "HTTP/1.0"))
+ {
+ log_error(LOG_LEVEL_ERROR, "The only supported HTTP "
+ "versions are 1.0 and 1.1. This rules out: %s", v[2]);
+ freez(buf);
+ return JB_ERR_PARSE;
+ }
+
+ http->ssl = !strcmpic(v[0], "CONNECT");
+
+ err = parse_http_url(v[1], http, !http->ssl);
if (err)
{
freez(buf);
/*
* Copy the details into the structure
*/
- http->ssl = !strcmpic(v[0], "CONNECT");
http->cmd = strdup(req);
http->gpc = strdup(v[0]);
http->ver = strdup(v[2]);
{
int errcode;
char rebuf[BUFFER_SIZE];
- const char *fmt;
+ const char *fmt = NULL;
assert(pattern);
assert(strlen(pattern) < sizeof(rebuf) - 2);
*p = '\0';
}
- p = strchr(buf, ':');
+ /*
+ * IPv6 numeric hostnames can contain colons, thus we need
+ * to delimit the hostname before the real port separator.
+ * As brackets are already used in the hostname pattern,
+ * we use angle brackets ('<', '>') instead.
+ */
+ if ((buf[0] == '<') && (NULL != (p = strchr(buf + 1, '>'))))
+ {
+ *p++ = '\0';
+ buf++;
+
+ if (*p == '\0')
+ {
+ /* IPv6 address without port number */
+ p = NULL;
+ }
+ else if (*p != ':')
+ {
+ /* Garbage after address delimiter */
+ return JB_ERR_PARSE;
+ }
+ }
+ else
+ {
+ p = strchr(buf, ':');
+ }
+
if (NULL != p)
{
*p++ = '\0';
}
+/*********************************************************************
+ *
+ * Function : parse_forwarder_address
+ *
+ * Description : Parse out the host and port from a forwarder address.
+ *
+ * Parameters :
+ * 1 : address = The forwarder address to parse.
+ * 2 : hostname = Used to return the hostname. NULL on error.
+ * 3 : port = Used to return the port. Untouched if no port
+ * is specified.
+ *
+ * Returns : JB_ERR_OK on success
+ * JB_ERR_MEMORY on out of memory
+ * JB_ERR_PARSE on malformed address.
+ *
+ *********************************************************************/
+jb_err parse_forwarder_address(char *address, char **hostname, int *port)
+{
+ char *p = address;
+
+ if ((*address == '[') && (NULL == strchr(address, ']')))
+ {
+ /* XXX: Should do some more validity checks here. */
+ return JB_ERR_PARSE;
+ }
+
+ *hostname = strdup(address);
+ if (NULL == *hostname)
+ {
+ return JB_ERR_MEMORY;
+ }
+
+ if ((**hostname == '[') && (NULL != (p = strchr(*hostname, ']'))))
+ {
+ *p++ = '\0';
+ memmove(*hostname, (*hostname + 1), (size_t)(p - *hostname));
+ if (*p == ':')
+ {
+ *port = (int)strtol(++p, NULL, 0);
+ }
+ }
+ else if (NULL != (p = strchr(*hostname, ':')))
+ {
+ *p++ = '\0';
+ *port = (int)strtol(p, NULL, 0);
+ }
+
+ return JB_ERR_OK;
+
+}
+
+
/*
Local Variables:
tab-width: 3