1 /*********************************************************************
3 * File : $Source: /cvsroot/ijbswa/current/encode.c,v $
5 * Purpose : Functions to encode and decode URLs, and also to
6 * encode cookies and HTML text.
8 * Copyright : Written by and Copyright (C) 2001 the
9 * Privoxy team. http://www.privoxy.org/
11 * Based on the Internet Junkbuster originally written
12 * by and Copyright (C) 1997 Anonymous Coders and
13 * Junkbusters Corporation. http://www.junkbusters.com
15 * This program is free software; you can redistribute it
16 * and/or modify it under the terms of the GNU General
17 * Public License as published by the Free Software
18 * Foundation; either version 2 of the License, or (at
19 * your option) any later version.
21 * This program is distributed in the hope that it will
22 * be useful, but WITHOUT ANY WARRANTY; without even the
23 * implied warranty of MERCHANTABILITY or FITNESS FOR A
24 * PARTICULAR PURPOSE. See the GNU General Public
25 * License for more details.
27 * The GNU General Public License should be included with
28 * this file. If not, you can view it at
29 * http://www.gnu.org/copyleft/gpl.html
30 * or write to the Free Software Foundation, Inc., 59
31 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
33 *********************************************************************/
46 /* Maps special characters in a URL to their equivalent % codes. */
47 static const char url_code_map[256][4] = {
48 "", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
49 "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
50 "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
51 "%1E", "%1F", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
52 "%28", "%29", "", "%2B", "%2C", "", "", "%2F", "", "",
53 "", "", "", "", "", "", "", "", "%3A", "%3B",
54 "%3C", "%3D", "%3E", "%3F", "", "", "", "", "", "",
55 "", "", "", "", "", "", "", "", "", "",
56 "", "", "", "", "", "", "", "", "", "",
57 "", "%5B", "%5C", "%5D", "%5E", "", "%60", "", "", "",
58 "", "", "", "", "", "", "", "", "", "",
59 "", "", "", "", "", "", "", "", "", "",
60 "", "", "", "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
61 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
62 "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
63 "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
64 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
65 "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
66 "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
67 "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
68 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
69 "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
70 "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
71 "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
72 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
73 "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
76 /* Maps special characters in HTML to their equivalent entities. */
77 static const char * const html_code_map[256] = {
78 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
79 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
80 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
81 NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&","'",
82 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
83 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
84 "<",NULL,">",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
85 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
86 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
87 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
88 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
89 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
90 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
91 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
92 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
93 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
94 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
95 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
96 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
97 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
98 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
99 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
100 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
101 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
102 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
103 NULL, NULL, NULL, NULL, NULL, NULL
107 /*********************************************************************
109 * Function : html_encode
111 * Description : Encodes a string so it's not interpreted as
112 * containing HTML tags or entities.
113 * Replaces <, >, &, and " with the appropriate HTML
117 * 1 : s = String to encode. Null-terminated.
119 * Returns : Encoded string, newly allocated on the heap.
120 * Caller is responsible for freeing it with free().
121 * If s is NULL, or on out-of memory, returns NULL.
123 *********************************************************************/
124 char * html_encode(const char *s)
134 /* each input char can expand to at most 6 chars */
135 buf_size = (strlen(s) * 6) + 1;
136 buf = (char *) malloc(buf_size);
142 while ((c = *s++) != '\0')
144 const char * replace_with = html_code_map[(unsigned char) c];
145 if (replace_with != NULL)
147 const size_t bytes_written = (size_t)(p - buf);
148 assert(bytes_written < buf_size);
149 p += strlcpy(p, replace_with, buf_size - bytes_written);
159 assert(strlen(buf) < buf_size);
166 /*********************************************************************
168 * Function : html_encode_and_free_original
170 * Description : Encodes a string so it's not interpreted as
171 * containing HTML tags or entities.
172 * Replaces <, >, &, and " with the appropriate HTML
173 * entities. Free()s original string.
174 * If original string is NULL, simply returns NULL.
177 * 1 : s = String to encode. Null-terminated.
179 * Returns : Encoded string, newly allocated on the heap.
180 * Caller is responsible for freeing it with free().
181 * If s is NULL, or on out-of memory, returns NULL.
183 *********************************************************************/
184 char * html_encode_and_free_original(char *s)
193 result = html_encode(s);
200 /*********************************************************************
202 * Function : url_encode
204 * Description : Encodes a string so it can be used in a URL
205 * query string. Replaces special characters with
206 * the appropriate %xx codes.
208 * XXX: url_query_encode() would be a more fitting
212 * 1 : s = String to encode. Null-terminated.
214 * Returns : Encoded string, newly allocated on the heap.
215 * Caller is responsible for freeing it with free().
216 * If s is NULL, or on out-of memory, returns NULL.
218 *********************************************************************/
219 char * url_encode(const char *s)
229 /* each input char can expand to at most 3 chars */
230 buf_size = (strlen(s) * 3) + 1;
231 buf = (char *) malloc(buf_size);
237 while((c = *s++) != '\0')
239 const char *replace_with = url_code_map[(unsigned char) c];
240 if (*replace_with != '\0')
242 const size_t bytes_written = (size_t)(p - buf);
243 assert(bytes_written < buf_size);
244 p += strlcpy(p, replace_with, buf_size - bytes_written);
254 assert(strlen(buf) < buf_size);
261 /*********************************************************************
265 * Description : Converts a single hex digit to an integer.
268 * 1 : d = in the range of ['0'..'9', 'A'..'F', 'a'..'f']
270 * Returns : The integer value, or -1 for non-hex characters.
272 *********************************************************************/
273 static int xdtoi(const int d)
275 if ((d >= '0') && (d <= '9'))
279 else if ((d >= 'a') && (d <= 'f'))
281 return(d - 'a' + 10);
283 else if ((d >= 'A') && (d <= 'F'))
285 return(d - 'A' + 10);
294 /*********************************************************************
298 * Description : Hex string to integer conversion.
301 * 1 : s = a 2 digit hex string (e.g. "1f"). Only the
302 * first two characters will be looked at.
304 * Returns : The integer value, or 0 for non-hex strings.
306 *********************************************************************/
307 int xtoi(const char *s)
314 int d2 = xdtoi(*(s+1));
317 return (d1 << 4) + d2;
325 /*********************************************************************
327 * Function : url_decode
329 * Description : Decodes a URL query string, replacing %xx codes
330 * with their decoded form.
333 * 1 : s = String to decode. Null-terminated.
335 * Returns : Decoded string, newly allocated on the heap.
336 * Caller is responsible for freeing it with free().
338 *********************************************************************/
339 char *url_decode(const char * s)
341 char *buf = malloc(strlen(s) + 1);
356 if ((*q = (char)xtoi(s + 1)) != '\0')
363 /* malformed, just use it */
381 /*********************************************************************
383 * Function : percent_encode_url
385 * Description : Percent-encodes a string so it no longer contains
386 * any characters that aren't valid in an URL according
389 * XXX: Do not confuse with encode_url()
392 * 1 : s = String to encode. Null-terminated.
394 * Returns : Encoded string, newly allocated on the heap.
395 * Caller is responsible for freeing it with free().
396 * If s is NULL, or on out-of memory, returns NULL.
398 *********************************************************************/
399 char *percent_encode_url(const char *s)
401 static const char allowed_characters[128] = {
402 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
403 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
404 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
405 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
406 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
407 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
408 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
409 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
410 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
411 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
412 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
413 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
414 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
421 /* Each input char can expand to at most 3 chars. */
422 buf_size = (strlen(s) * 3) + 1;
423 buf = (char *)malloc(buf_size);
429 while ((c = *s++) != '\0')
431 const unsigned int i = (unsigned char)c;
432 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
434 const char *replace_with = url_code_map[i];
435 assert(*replace_with != '\0');
436 if (*replace_with != '\0')
438 const size_t bytes_written = (size_t)(p - buf);
439 assert(bytes_written < buf_size);
440 p += strlcpy(p, replace_with, buf_size - bytes_written);
450 assert(strlen(buf) < buf_size);