1 const char encode_rcs[] = "$Id: encode.c,v 1.22 2011/11/06 11:44:32 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/encode.c,v $
6 * Purpose : Functions to encode and decode URLs, and also to
7 * encode cookies and HTML text.
9 * Copyright : Written by and Copyright (C) 2001 the
10 * Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34 *********************************************************************/
47 const char encode_h_rcs[] = ENCODE_H_VERSION;
49 /* Maps special characters in a URL to their equivalent % codes. */
50 static const char * const url_code_map[256] = {
51 NULL, "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
52 "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
53 "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
54 "%1E", "%1F", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
55 "%28", "%29", NULL, "%2B", "%2C", NULL, NULL, "%2F", NULL, NULL,
56 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3A", "%3B",
57 "%3C", "%3D", "%3E", "%3F", NULL, NULL, NULL, NULL, NULL, NULL,
58 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
59 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
60 NULL, "%5B", "%5C", "%5D", "%5E", NULL, "%60", NULL, NULL, NULL,
61 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
62 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
63 NULL, NULL, NULL, "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
64 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
65 "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
66 "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
67 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
68 "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
69 "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
70 "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
71 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
72 "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
73 "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
74 "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
75 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
76 "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
79 /* Maps special characters in HTML to their equivalent entities. */
80 static const char * const html_code_map[256] = {
81 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
82 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
83 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
84 NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&","'",
85 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
86 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
87 "<",NULL,">",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
88 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
89 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
90 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
91 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
92 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
93 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
94 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
95 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
96 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
97 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
98 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
99 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
100 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
101 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
102 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
103 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
104 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
105 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
106 NULL, NULL, NULL, NULL, NULL, NULL
110 /*********************************************************************
112 * Function : html_encode
114 * Description : Encodes a string so it's not interpreted as
115 * containing HTML tags or entities.
116 * Replaces <, >, &, and " with the appropriate HTML
120 * 1 : s = String to encode. Null-terminated.
122 * Returns : Encoded string, newly allocated on the heap.
123 * Caller is responsible for freeing it with free().
124 * If s is NULL, or on out-of memory, returns NULL.
126 *********************************************************************/
127 char * html_encode(const char *s)
137 /* each input char can expand to at most 6 chars */
138 buf_size = (strlen(s) * 6) + 1;
139 buf = (char *) malloc(buf_size);
145 while ( (c = *s++) != '\0')
147 const char * replace_with = html_code_map[(unsigned char) c];
148 if(replace_with != NULL)
150 const size_t bytes_written = (size_t)(p - buf);
151 assert(bytes_written < buf_size);
152 p += strlcpy(p, replace_with, buf_size - bytes_written);
163 assert(strlen(buf) < buf_size);
168 /*********************************************************************
170 * Function : html_encode_and_free_original
172 * Description : Encodes a string so it's not interpreted as
173 * containing HTML tags or entities.
174 * Replaces <, >, &, and " with the appropriate HTML
175 * entities. Free()s original string.
176 * If original string is NULL, simply returns NULL.
179 * 1 : s = String to encode. Null-terminated.
181 * Returns : Encoded string, newly allocated on the heap.
182 * Caller is responsible for freeing it with free().
183 * If s is NULL, or on out-of memory, returns NULL.
185 *********************************************************************/
186 char * html_encode_and_free_original(char *s)
195 result = html_encode(s);
202 /*********************************************************************
204 * Function : url_encode
206 * Description : Encodes a string so it can be used in a URL
207 * query string. Replaces special characters with
208 * the appropriate %xx codes.
210 * XXX: url_query_encode() would be a more fitting
214 * 1 : s = String to encode. Null-terminated.
216 * Returns : Encoded string, newly allocated on the heap.
217 * Caller is responsible for freeing it with free().
218 * If s is NULL, or on out-of memory, returns NULL.
220 *********************************************************************/
221 char * url_encode(const char *s)
231 /* each input char can expand to at most 3 chars */
232 buf_size = (strlen(s) * 3) + 1;
233 buf = (char *) malloc(buf_size);
239 while( (c = *s++) != '\0')
241 const char * replace_with = url_code_map[(unsigned char) c];
242 if (replace_with != NULL)
244 const size_t bytes_written = (size_t)(p - buf);
245 assert(bytes_written < buf_size);
246 p += strlcpy(p, replace_with, buf_size - bytes_written);
258 assert(strlen(buf) < buf_size);
263 /*********************************************************************
267 * Description : Converts a single hex digit to an integer.
270 * 1 : d = in the range of ['0'..'9', 'A'..'F', 'a'..'f']
272 * Returns : The integer value, or -1 for non-hex characters.
274 *********************************************************************/
275 static int xdtoi(const int d)
277 if ((d >= '0') && (d <= '9'))
281 else if ((d >= 'a') && (d <= 'f'))
283 return(d - 'a' + 10);
285 else if ((d >= 'A') && (d <= 'F'))
287 return(d - 'A' + 10);
296 /*********************************************************************
300 * Description : Hex string to integer conversion.
303 * 1 : s = a 2 digit hex string (e.g. "1f"). Only the
304 * first two characters will be looked at.
306 * Returns : The integer value, or 0 for non-hex strings.
308 *********************************************************************/
309 int xtoi(const char *s)
316 int d2 = xdtoi(*(s+1));
319 return (d1 << 4) + d2;
327 /*********************************************************************
329 * Function : url_decode
331 * Description : Decodes a URL query string, replacing %xx codes
332 * with their decoded form.
335 * 1 : s = String to decode. Null-terminated.
337 * Returns : Decoded string, newly allocated on the heap.
338 * Caller is responsible for freeing it with free().
340 *********************************************************************/
341 char *url_decode(const char * s)
343 char *buf = malloc(strlen(s) + 1);
358 if ((*q = (char)xtoi(s + 1)) != '\0')
365 /* malformed, just use it */
383 /*********************************************************************
385 * Function : percent_encode_url
387 * Description : Percent-encodes a string so it no longer contains
388 * any characters that aren't valid in an URL according
391 * XXX: Do not confuse with encode_url()
394 * 1 : s = String to encode. Null-terminated.
396 * Returns : Encoded string, newly allocated on the heap.
397 * Caller is responsible for freeing it with free().
398 * If s is NULL, or on out-of memory, returns NULL.
400 *********************************************************************/
401 char *percent_encode_url(const char *s)
403 static const char allowed_characters[128] = {
404 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
405 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
406 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
407 '\0', '\0', '\0', '!', '\0', '#', '$', '%', '&', '\'',
408 '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
409 '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
410 '\0', '=', '\0', '?', '@', 'A', 'B', 'C', 'D', 'E',
411 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
412 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
413 'Z', '[', '\0', ']', '\0', '_', '\0', 'a', 'b', 'c',
414 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
415 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
416 'x', 'y', 'z', '\0', '\0', '\0', '~', '\0'
423 /* Each input char can expand to at most 3 chars. */
424 buf_size = (strlen(s) * 3) + 1;
425 buf = (char *)malloc(buf_size);
431 while((c = *s++) != '\0')
433 const unsigned int i = (unsigned char)c;
434 if (i >= sizeof(allowed_characters) || '\0' == allowed_characters[i])
436 const char *replace_with = url_code_map[i];
437 assert(replace_with != NULL);
438 if (replace_with != NULL)
440 const size_t bytes_written = (size_t)(p - buf);
441 assert(bytes_written < buf_size);
442 p += strlcpy(p, replace_with, buf_size - bytes_written);
453 assert(strlen(buf) < buf_size);