1 const char encode_rcs[] = "$Id: encode.c,v 2.0 2002/06/04 14:34:21 jongfoster Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/src/encode.c,v $
6 * Purpose : Functions to encode and decode URLs, and also to
7 * encode cookies and HTML text.
9 * Copyright : Written by and Copyright (C) 2001 the SourceForge
10 * Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
36 * Revision 2.0 2002/06/04 14:34:21 jongfoster
37 * Moving source files to src/
39 * Revision 1.8 2002/03/26 22:29:54 swa
40 * we have a new homepage!
42 * Revision 1.7 2002/03/24 13:25:43 swa
43 * name change related issues
45 * Revision 1.6 2002/03/13 00:27:04 jongfoster
48 * Revision 1.5 2002/03/07 03:46:53 oes
49 * Fixed compiler warnings etc
51 * Revision 1.4 2002/01/22 23:28:07 jongfoster
52 * Adding convenience function html_encode_and_free_original()
53 * Making all functions accept NULL paramaters - in this case, they
54 * simply return NULL. This allows error-checking to be deferred.
56 * Revision 1.3 2001/11/13 00:16:40 jongfoster
57 * Replacing references to malloc.h with the standard stdlib.h
58 * (See ANSI or K&R 2nd Ed)
60 * Revision 1.2 2001/05/17 22:52:35 oes
61 * - Cleaned CRLF's from the sources and related files
63 * Revision 1.1.1.1 2001/05/15 13:58:51 oes
64 * Initial import of version 2.9.3 source tree
67 *********************************************************************/
78 const char encode_h_rcs[] = ENCODE_H_VERSION;
81 * Maps special characters in a URL to their equivalent % codes.
83 static const char * const url_code_map[256] = {
84 NULL, "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
85 "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
86 "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
87 "%1E", "%1F", "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
88 "%28", "%29", NULL, "%2B", "%2C", NULL, NULL, "%2F", NULL, NULL,
89 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3A", "%3B",
90 "%3C", "%3D", "%3E", "%3F", NULL, NULL, NULL, NULL, NULL, NULL,
91 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
92 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
93 NULL, "%5B", "%5C", "%5D", "%5E", NULL, "%60", NULL, NULL, NULL,
94 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
95 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
96 NULL, NULL, NULL, "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
97 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
98 "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
99 "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
100 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
101 "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
102 "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
103 "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
104 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
105 "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
106 "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
107 "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
108 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
109 "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
113 * Maps special characters in HTML to their equivalent entities.
115 static const char * const html_code_map[256] = {
116 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
117 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
118 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
119 NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&",NULL,
120 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
121 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
122 "<",NULL,">",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
123 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
124 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
125 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
126 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
127 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
128 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
129 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
130 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
131 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
132 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
133 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
134 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
135 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
136 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
137 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
138 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
139 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
140 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
141 NULL, NULL, NULL, NULL, NULL, NULL
145 * Maps special characters in a cookie to their equivalent % codes.
147 static const char * const cookie_code_map[256] = {
148 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
149 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
150 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
151 NULL, NULL, "+", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
152 NULL, NULL, NULL, NULL, "%2C",NULL, NULL, NULL, NULL, NULL,
153 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3B",
154 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
155 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
156 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
157 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
158 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
159 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
160 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
161 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
162 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
163 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
164 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
165 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
166 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
167 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
168 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
169 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
170 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
171 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
172 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
173 NULL, NULL, NULL, NULL, NULL, NULL
177 /*********************************************************************
179 * Function : html_encode
181 * Description : Encodes a string so it's not interpreted as
182 * containing HTML tags or entities.
183 * Replaces <, >, &, and " with the appropriate HTML
187 * 1 : s = String to encode. Null-terminated.
189 * Returns : Encoded string, newly allocated on the heap.
190 * Caller is responsible for freeing it with free().
191 * If s is NULL, or on out-of memory, returns NULL.
193 *********************************************************************/
194 char * html_encode(const char *s)
203 /* each input char can expand to at most 6 chars */
204 buf = (char *) malloc((strlen(s) * 6) + 1);
210 while ( (c = *s++) != '\0')
212 const char * replace_with = html_code_map[(unsigned char) c];
213 if(replace_with != NULL)
215 strcpy(p, replace_with);
216 p += strlen(replace_with);
231 /*********************************************************************
233 * Function : html_encode_and_free_original
235 * Description : Encodes a string so it's not interpreted as
236 * containing HTML tags or entities.
237 * Replaces <, >, &, and " with the appropriate HTML
238 * entities. Free()s original string.
239 * If original string is NULL, simply returns NULL.
242 * 1 : s = String to encode. Null-terminated.
244 * Returns : Encoded string, newly allocated on the heap.
245 * Caller is responsible for freeing it with free().
246 * If s is NULL, or on out-of memory, returns NULL.
248 *********************************************************************/
249 char * html_encode_and_free_original(char *s)
258 result = html_encode(s);
265 /*********************************************************************
267 * Function : cookie_encode
269 * Description : Encodes a string so it can be used in a cookie.
270 * Replaces " ", ",", and ";" with the appropriate
274 * 1 : s = String to encode. Null-terminated.
276 * Returns : Encoded string, newly allocated on the heap.
277 * Caller is responsible for freeing it with free().
278 * If s is NULL, or on out-of memory, returns NULL.
280 *********************************************************************/
281 char * cookie_encode(const char *s)
290 /* each input char can expand to at most 3 chars */
291 buf = (char *) malloc((strlen(s) * 3) + 1);
297 while ( (c = *s++) != '\0')
299 const char * replace_with = cookie_code_map[(unsigned char) c];
300 if (replace_with != NULL)
302 strcpy(p, replace_with);
303 p += strlen(replace_with);
317 /*********************************************************************
319 * Function : url_encode
321 * Description : Encodes a string so it can be used in a URL
322 * query string. Replaces special characters with
323 * the appropriate %xx codes.
326 * 1 : s = String to encode. Null-terminated.
328 * Returns : Encoded string, newly allocated on the heap.
329 * Caller is responsible for freeing it with free().
330 * If s is NULL, or on out-of memory, returns NULL.
332 *********************************************************************/
333 char * url_encode(const char *s)
342 /* each input char can expand to at most 3 chars */
343 buf = (char *) malloc((strlen(s) * 3) + 1);
349 while( (c = *s++) != '\0')
351 const char * replace_with = url_code_map[(unsigned char) c];
352 if (replace_with != NULL)
354 strcpy(p, replace_with);
355 p += strlen(replace_with);
371 /*********************************************************************
375 * Description : Converts a single hex digit to an integer.
378 * 1 : d = in the range of ['0'..'9', 'A'..'F', 'a'..'f']
380 * Returns : The integer value, or -1 for non-hex characters.
382 *********************************************************************/
383 static int xdtoi(const int d)
385 if ((d >= '0') && (d <= '9'))
389 else if ((d >= 'a') && (d <= 'f'))
391 return(d - 'a' + 10);
393 else if ((d >= 'A') && (d <= 'F'))
395 return(d - 'A' + 10);
404 /*********************************************************************
408 * Description : Hex string to integer conversion.
411 * 1 : s = a 2 digit hex string (e.g. "1f"). Only the
412 * first two characters will be looked at.
414 * Returns : The integer value, or 0 for non-hex strings.
416 *********************************************************************/
417 static int xtoi(const char *s)
427 return (d1 << 4) + d2;
435 /*********************************************************************
437 * Function : url_decode
439 * Description : Decodes a URL query string, replacing %xx codes
440 * with their decoded form.
443 * 1 : s = String to decode. Null-terminated.
445 * Returns : Decoded string, newly allocated on the heap.
446 * Caller is responsible for freeing it with free().
448 *********************************************************************/
449 char *url_decode(const char * s)
451 char *buf = malloc(strlen(s) + 1);
466 if ((*q = xtoi(s + 1)) != '\0')
473 /* malformed, just use it */