From 8c3e4f19001300c07cb076353eba625e4729fdd2 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sun, 6 Nov 2011 11:41:05 +0000 Subject: [PATCH] Add url_requires_percent_encoding() Checks if an URL contains invalid characters according to RFC 3986 that should be percent-encoded. --- urlmatch.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- urlmatch.h | 6 ++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/urlmatch.c b/urlmatch.c index 3dead33b..5fe605c1 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -1,4 +1,4 @@ -const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.62 2011/09/04 11:10:56 fabiankeil Exp $"; +const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.63 2011/09/04 11:37:05 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $ @@ -167,6 +167,53 @@ jb_err init_domain_components(struct http_request *http) #endif /* ndef FEATURE_EXTENDED_HOST_PATTERNS */ +/********************************************************************* + * + * Function : url_requires_percent_encoding + * + * Description : Checks if an URL contains invalid characters + * according to RFC 3986 that should be percent-encoded. + * Does not verify whether or not the passed string + * actually is a valid URL. + * + * Parameters : + * 1 : url = URL to check + * + * Returns : True in case of valid URLs, false otherwise + * + *********************************************************************/ +int url_requires_percent_encoding(const char *url) +{ + static const char allowed_characters[128] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', + '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', + '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E', + 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c', + 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0' + }; + + while (*url != '\0') + { + const unsigned int i = (unsigned char)*url++; + if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i]) + { + return TRUE; + } + } + + return FALSE; + +} + + /********************************************************************* * * Function : parse_http_url diff --git a/urlmatch.h b/urlmatch.h index d7a7a41b..cbf864de 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -1,6 +1,6 @@ #ifndef URLMATCH_H_INCLUDED #define URLMATCH_H_INCLUDED -#define URLMATCH_H_VERSION "$Id: urlmatch.h,v 1.16 2009/06/03 16:42:49 fabiankeil Exp $" +#define URLMATCH_H_VERSION "$Id: urlmatch.h,v 1.17 2011/09/04 11:10:56 fabiankeil Exp $" /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/urlmatch.h,v $ @@ -8,7 +8,7 @@ * Purpose : Declares functions to match URLs against URL * patterns. * - * Copyright : Written by and Copyright (C) 2001-2002, 2006 the SourceForge + * Copyright : Written by and Copyright (C) 2001-2002, 2006 the * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written @@ -50,6 +50,8 @@ extern jb_err parse_http_request(const char *req, struct http_request *http); extern jb_err parse_http_url(const char *url, struct http_request *http, int require_protocol); +extern int url_requires_percent_encoding(const char *url); + #define REQUIRE_PROTOCOL 1 extern int url_match(const struct url_spec *pattern, -- 2.39.2