X-Git-Url: http://www.privoxy.org/gitweb/misc.html?a=blobdiff_plain;f=encode.c;h=6cb57cf5aa72c9bab88651c78f6f567fb8e1f718;hb=8fbaec9db37a4b73ae23727e60ddac591d2fc911;hp=2ca9fad9c8ceb77e249bb78bf3500e859b152a1f;hpb=605576ce35e16c57567f79dd9086bb9ae001753b;p=privoxy.git
diff --git a/encode.c b/encode.c
index 2ca9fad9..6cb57cf5 100644
--- a/encode.c
+++ b/encode.c
@@ -1,4 +1,3 @@
-const char encode_rcs[] = "$Id: encode.c,v 1.2 2001/05/17 22:52:35 oes Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/encode.c,v $
@@ -6,14 +5,14 @@ const char encode_rcs[] = "$Id: encode.c,v 1.2 2001/05/17 22:52:35 oes Exp $";
* Purpose : Functions to encode and decode URLs, and also to
* encode cookies and HTML text.
*
- * Copyright : Written by and Copyright (C) 2001 the SourceForge
- * IJBSWA team. http://ijbswa.sourceforge.net
+ * Copyright : Written by and Copyright (C) 2001 the
+ * Privoxy team. https://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
- * by and Copyright (C) 1997 Anonymous Coders and
+ * by and Copyright (C) 1997 Anonymous Coders and
* Junkbusters Corporation. http://www.junkbusters.com
*
- * This program is free software; you can redistribute it
+ * This program is free software; you can redistribute it
* and/or modify it under the terms of the GNU General
* Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at
@@ -31,43 +30,34 @@ const char encode_rcs[] = "$Id: encode.c,v 1.2 2001/05/17 22:52:35 oes Exp $";
* or write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
- * Revisions :
- * $Log: encode.c,v $
- * Revision 1.2 2001/05/17 22:52:35 oes
- * - Cleaned CRLF's from the sources and related files
- *
- * Revision 1.1.1.1 2001/05/15 13:58:51 oes
- * Initial import of version 2.9.3 source tree
- *
- *
*********************************************************************/
-
+
#include "config.h"
#include
#include
#include
+#include
+#include "miscutil.h"
#include "encode.h"
-const char encode_h_rcs[] = ENCODE_H_VERSION;
-
/* Maps special characters in a URL to their equivalent % codes. */
-static const char * const url_code_map[256] = {
- NULL, "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
+static const char url_code_map[256][4] = {
+ "", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
"%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
"%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
- "%1E", "%1F", "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
- "%28", "%29", NULL, "%2B", "%2C", NULL, NULL, "%2F", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3A", "%3B",
- "%3C", "%3D", "%3E", "%3F", NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, "%5B", "%5C", "%5D", "%5E", NULL, "%60", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
+ "%1E", "%1F", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
+ "%28", "%29", "", "%2B", "%2C", "", "", "%2F", "", "",
+ "", "", "", "", "", "", "", "", "%3A", "%3B",
+ "%3C", "%3D", "%3E", "%3F", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "", "",
+ "", "%5B", "%5C", "%5D", "%5E", "", "%60", "", "", "",
+ "", "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "", "",
+ "", "", "", "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
"%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
"%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
"%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
@@ -83,12 +73,12 @@ static const char * const url_code_map[256] = {
"%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
};
-/* Maps special characters in HTML to their equivalent entites. */
+/* Maps special characters in HTML to their equivalent entities. */
static const char * const html_code_map[256] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&",NULL,
+ NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&","'",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
"<",NULL,">",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
@@ -113,36 +103,6 @@ static const char * const html_code_map[256] = {
NULL, NULL, NULL, NULL, NULL, NULL
};
-/* Maps special characters in a cookie to their equivalent % codes. */
-static const char * const cookie_code_map[256] = {
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, "+", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "%2C",NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3B",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL
-};
-
/*********************************************************************
*
@@ -156,26 +116,37 @@ static const char * const cookie_code_map[256] = {
* Parameters :
* 1 : s = String to encode. Null-terminated.
*
- * Returns : Encoded string, newly allocated on the heap.
+ * Returns : Encoded string, newly allocated on the heap.
* Caller is responsible for freeing it with free().
+ * If s is NULL, or on out-of memory, returns NULL.
*
*********************************************************************/
char * html_encode(const char *s)
{
+ char * buf;
+ size_t buf_size;
+
+ if (s == NULL)
+ {
+ return NULL;
+ }
+
/* each input char can expand to at most 6 chars */
- char * buf = (char *) malloc((strlen(s) * 6) + 1);
+ buf_size = (strlen(s) * 6) + 1;
+ buf = (char *) malloc(buf_size);
if (buf)
{
char c;
char * p = buf;
- while ( (c = *s++) != '\0')
+ while ((c = *s++) != '\0')
{
const char * replace_with = html_code_map[(unsigned char) c];
- if(replace_with != NULL)
+ if (replace_with != NULL)
{
- strcpy(p, replace_with);
- p += strlen(replace_with);
+ const size_t bytes_written = (size_t)(p - buf);
+ assert(bytes_written < buf_size);
+ p += strlcpy(p, replace_with, buf_size - bytes_written);
}
else
{
@@ -184,55 +155,48 @@ char * html_encode(const char *s)
}
*p = '\0';
+
+ assert(strlen(buf) < buf_size);
}
return(buf);
}
+
/*********************************************************************
*
- * Function : cookie_encode
+ * Function : html_encode_and_free_original
*
- * Description : Encodes a string so it can be used in a cookie.
- * Replaces " ", ",", and ";" with the appropriate
- * codes.
+ * Description : Encodes a string so it's not interpreted as
+ * containing HTML tags or entities.
+ * Replaces <, >, &, and " with the appropriate HTML
+ * entities. Free()s original string.
+ * If original string is NULL, simply returns NULL.
*
* Parameters :
* 1 : s = String to encode. Null-terminated.
*
- * Returns : Encoded string, newly allocated on the heap.
+ * Returns : Encoded string, newly allocated on the heap.
* Caller is responsible for freeing it with free().
+ * If s is NULL, or on out-of memory, returns NULL.
*
*********************************************************************/
-char * cookie_encode(const char *s)
+char * html_encode_and_free_original(char *s)
{
- /* each input char can expand to at most 3 chars */
- char * buf = (char *) malloc((strlen(s) * 3) + 1);
+ char * result;
- if (buf)
+ if (s == NULL)
{
- char c;
- char * p = buf;
- while ( (c = *s++) != '\0')
- {
- const char * replace_with = cookie_code_map[(unsigned char) c];
- if (replace_with != NULL)
- {
- strcpy(p, replace_with);
- p += strlen(replace_with);
- }
- else
- {
- *p++ = c;
- }
- }
-
- *p = '\0';
+ return NULL;
}
- return(buf);
+ result = html_encode(s);
+ free(s);
+
+ return result;
}
+
/*********************************************************************
*
* Function : url_encode
@@ -241,29 +205,43 @@ char * cookie_encode(const char *s)
* query string. Replaces special characters with
* the appropriate %xx codes.
*
+ * XXX: url_query_encode() would be a more fitting
+ * name.
+ *
* Parameters :
* 1 : s = String to encode. Null-terminated.
*
- * Returns : Encoded string, newly allocated on the heap.
+ * Returns : Encoded string, newly allocated on the heap.
* Caller is responsible for freeing it with free().
+ * If s is NULL, or on out-of memory, returns NULL.
*
*********************************************************************/
char * url_encode(const char *s)
{
+ char * buf;
+ size_t buf_size;
+
+ if (s == NULL)
+ {
+ return NULL;
+ }
+
/* each input char can expand to at most 3 chars */
- char * buf = (char *) malloc((strlen(s) * 3) + 1);
+ buf_size = (strlen(s) * 3) + 1;
+ buf = (char *) malloc(buf_size);
if (buf)
{
char c;
char * p = buf;
- while( (c = *s++) != '\0')
+ while((c = *s++) != '\0')
{
- const char * replace_with = url_code_map[(unsigned char) c];
- if (replace_with != NULL)
+ const char *replace_with = url_code_map[(unsigned char) c];
+ if (*replace_with != '\0')
{
- strcpy(p, replace_with);
- p += strlen(replace_with);
+ const size_t bytes_written = (size_t)(p - buf);
+ assert(bytes_written < buf_size);
+ p += strlcpy(p, replace_with, buf_size - bytes_written);
}
else
{
@@ -273,6 +251,7 @@ char * url_encode(const char *s)
*p = '\0';
+ assert(strlen(buf) < buf_size);
}
return(buf);
@@ -291,13 +270,13 @@ char * url_encode(const char *s)
* Returns : The integer value, or -1 for non-hex characters.
*
*********************************************************************/
-static int xdtoi(char d)
+static int xdtoi(const int d)
{
if ((d >= '0') && (d <= '9'))
{
return(d - '0');
}
- else if ((d >= 'a') && (d <= 'f'))
+ else if ((d >= 'a') && (d <= 'f'))
{
return(d - 'a' + 10);
}
@@ -325,15 +304,15 @@ static int xdtoi(char d)
* Returns : The integer value, or 0 for non-hex strings.
*
*********************************************************************/
-static int xtoi(const char *s)
+int xtoi(const char *s)
{
- int d1, d2;
+ int d1;
- d1 = xdtoi(*s++);
- if(d1 >= 0)
+ d1 = xdtoi(*s);
+ if (d1 >= 0)
{
- d2 = xdtoi(*s);
- if(d2 >= 0)
+ int d2 = xdtoi(*(s+1));
+ if (d2 >= 0)
{
return (d1 << 4) + d2;
}
@@ -353,7 +332,7 @@ static int xtoi(const char *s)
* Parameters :
* 1 : s = String to decode. Null-terminated.
*
- * Returns : Decoded string, newly allocated on the heap.
+ * Returns : Decoded string, newly allocated on the heap.
* Caller is responsible for freeing it with free().
*
*********************************************************************/
@@ -374,7 +353,7 @@ char *url_decode(const char * s)
break;
case '%':
- if ((*q = xtoi(s + 1)))
+ if ((*q = (char)xtoi(s + 1)) != '\0')
{
s += 3;
q++;
@@ -399,6 +378,83 @@ char *url_decode(const char * s)
}
+/*********************************************************************
+ *
+ * Function : percent_encode_url
+ *
+ * Description : Percent-encodes a string so it no longer contains
+ * any characters that aren't valid in an URL according
+ * to RFC 3986.
+ *
+ * XXX: Do not confuse with encode_url()
+ *
+ * Parameters :
+ * 1 : s = String to encode. Null-terminated.
+ *
+ * Returns : Encoded string, newly allocated on the heap.
+ * Caller is responsible for freeing it with free().
+ * If s is NULL, or on out-of memory, returns NULL.
+ *
+ *********************************************************************/
+char *percent_encode_url(const char *s)
+{
+ static const char allowed_characters[128] = {
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
+ '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
+ '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
+ '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
+ 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
+ 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
+ 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
+ };
+ char *buf;
+ size_t buf_size;
+
+ assert(s != NULL);
+
+ /* Each input char can expand to at most 3 chars. */
+ buf_size = (strlen(s) * 3) + 1;
+ buf = (char *)malloc(buf_size);
+
+ if (buf != NULL)
+ {
+ char c;
+ char *p = buf;
+ while ((c = *s++) != '\0')
+ {
+ const unsigned int i = (unsigned char)c;
+ if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
+ {
+ const char *replace_with = url_code_map[i];
+ assert(*replace_with != '\0');
+ if (*replace_with != '\0')
+ {
+ const size_t bytes_written = (size_t)(p - buf);
+ assert(bytes_written < buf_size);
+ p += strlcpy(p, replace_with, buf_size - bytes_written);
+ }
+ }
+ else
+ {
+ *p++ = c;
+ }
+ }
+ *p = '\0';
+
+ assert(strlen(buf) < buf_size);
+ }
+
+ return(buf);
+
+}
+
+
/*
Local Variables:
tab-width: 3