X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=pcrs.c;h=4baaf72b8119a37b8ef649593d3aac9042994f21;hp=cd0c94a8a181feb661df53e9839ec4de2e97eb14;hb=7367a58d13c72ef9978b46f416f9b4735981bdfb;hpb=c75584ebcc79f939fb4ec9c8f842cef6692640c7 diff --git a/pcrs.c b/pcrs.c index cd0c94a8..4baaf72b 100644 --- a/pcrs.c +++ b/pcrs.c @@ -1,18 +1,17 @@ -const char pcrs_rcs[] = "$Id: pcrs.c,v 1.1 2001/05/13 21:57:07 administrator Exp $"; +const char pcrs_rcs[] = "$Id: pcrs.c,v 1.6 2001/06/03 11:03:48 oes Exp $"; /********************************************************************* * - * File : $Source: /home/administrator/cvs/ijb/pcrs.c,v $ + * File : $Source: /cvsroot/ijbswa/current/pcrs.c,v $ * - * Purpose : This is the pre-pre-alpha realease of libpcrs. It is only - * published at this (ugly) stage of development, because it is + * Purpose : This is the alpha release of libpcrs. It is only published + * at this early stage of development, because it is * needed for a new feature in JunkBuster. * - * Apart from the code being quite a mess, no inconsistencies, - * memory leaks or functional bugs **should** be present. - * - * While you ROTFL at the code, you could just as well mail me - * (andreas@oesterhelt.org) with advice for improvement. + * While no inconsistencies, memory leaks or functional bugs + * are known at this time, there *could* be plenty ;-). Also, + * Many pcre-specific options are not yet supported, and + * error handling needs improvement. * * pcrs is a supplement to the brilliant pcre library by Philip * Hazel (ph10@cam.ac.uk) and adds Perl-style substitution. That @@ -21,10 +20,159 @@ const char pcrs_rcs[] = "$Id: pcrs.c,v 1.1 2001/05/13 21:57:07 administrator Exp * Currently, there's no documentation besides comments and the * source itself ;-) * - * Copyright : Written and copyright by andreas@oesterhelt.org + * Copyright : Written and Copyright (C) 2000 by Andreas Oesterhelt + * + * + * This program is free software; you can redistribute it + * and/or modify it under the terms of the GNU General + * Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will + * be useful, but WITHOUT ANY WARRANTY; without even the + * implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public + * License for more details. + * + * The GNU General Public License should be included with + * this file. If not, you can view it at + * http://www.gnu.org/copyleft/gpl.html + * or write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Revisions : * $Log: pcrs.c,v $ + * Revision 1.6 2001/06/03 11:03:48 oes + * Makefile/in + * + * introduced cgi.c + * + * actions.c: + * + * adapted to new enlist_unique arg format + * + * conf loadcfg.c + * + * introduced confdir option + * + * filters.c filtrers.h + * + * extracted-CGI relevant stuff + * + * jbsockets.c + * + * filled comment + * + * jcc.c + * + * support for new cgi mechansim + * + * list.c list.h + * + * functions for new list type: "map" + * extended enlist_unique + * + * miscutil.c .h + * introduced bindup() + * + * parsers.c parsers.h + * + * deleted const struct interceptors + * + * pcrs.c + * added FIXME + * + * project.h + * + * added struct map + * added struct http_response + * changes struct interceptors to struct cgi_dispatcher + * moved HTML stuff to cgi.h + * + * re_filterfile: + * + * changed + * + * showargs.c + * NO TIME LEFT + * + * Revision 1.5 2001/05/29 09:50:24 jongfoster + * Unified blocklist/imagelist/permissionslist. + * File format is still under discussion, but the internal changes + * are (mostly) done. + * + * Also modified interceptor behaviour: + * - We now intercept all URLs beginning with one of the following + * prefixes (and *only* these prefixes): + * * http://i.j.b/ + * * http://ijbswa.sf.net/config/ + * * http://ijbswa.sourceforge.net/config/ + * - New interceptors "home page" - go to http://i.j.b/ to see it. + * - Internal changes so that intercepted and fast redirect pages + * are not replaced with an image. + * - Interceptors now have the option to send a binary page direct + * to the client. (i.e. ijb-send-banner uses this) + * - Implemented show-url-info interceptor. (Which is why I needed + * the above interceptors changes - a typical URL is + * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif". + * The previous mechanism would not have intercepted that, and + * if it had been intercepted then it then it would have replaced + * it with an image.) + * + * Revision 1.4 2001/05/25 14:12:40 oes + * Fixed bug: Empty substitutes now detected + * + * Revision 1.3 2001/05/25 11:03:55 oes + * Added sanity check for NULL jobs to pcrs_exec_substitution + * + * Revision 1.2 2001/05/22 18:46:04 oes + * + * - Enabled filtering banners by size rather than URL + * by adding patterns that replace all standard banner + * sizes with the "Junkbuster" gif to the re_filterfile + * + * - Enabled filtering WebBugs by providing a pattern + * which kills all 1x1 images + * + * - Added support for PCRE_UNGREEDY behaviour to pcrs, + * which is selected by the (nonstandard and therefore + * capital) letter 'U' in the option string. + * It causes the quantifiers to be ungreedy by default. + * Appending a ? turns back to greedy (!). + * + * - Added a new interceptor ijb-send-banner, which + * sends back the "Junkbuster" gif. Without imagelist or + * MSIE detection support, or if tinygif = 1, or the + * URL isn't recognized as an imageurl, a lame HTML + * explanation is sent instead. + * + * - Added new feature, which permits blocking remote + * script redirects and firing back a local redirect + * to the browser. + * The feature is conditionally compiled, i.e. it + * can be disabled with --disable-fast-redirects, + * plus it must be activated by a "fast-redirects" + * line in the config file, has its own log level + * and of course wants to be displayed by show-proxy-args + * Note: Boy, all the #ifdefs in 1001 locations and + * all the fumbling with configure.in and acconfig.h + * were *way* more work than the feature itself :-( + * + * - Because a generic redirect template was needed for + * this, tinygif = 3 now uses the same. + * + * - Moved GIFs, and other static HTTP response templates + * to project.h + * + * - Some minor fixes + * + * - Removed some >400 CRs again (Jon, you really worked + * a lot! ;-) + * + * Revision 1.1.1.1 2001/05/15 13:59:02 oes + * Initial import of version 2.9.3 source tree + * * *********************************************************************/ @@ -108,7 +256,8 @@ int my_strsep(char *token, char **text, char delimiter, char quote_char) *********************************************************************/ int pcrs_compile_perl_options(char *optstring, int *globalflag) { - int i, rc = 0; + size_t i; + int rc = 0; *globalflag = 0; for (i=0; i < strlen(optstring); i++) { @@ -121,6 +270,7 @@ int pcrs_compile_perl_options(char *optstring, int *globalflag) case 'o': break; case 's': rc |= PCRE_DOTALL; break; case 'x': rc |= PCRE_EXTENDED; break; + case 'U': rc |= PCRE_UNGREEDY; break; default: break; } } @@ -321,7 +471,7 @@ pcrs_job *pcrs_make_job(char *command, int *errptr) { switch (i) { - /* We don't care about the command and assume 's' */ + /* We don't care about the command and assume 's' */ case 0: break; @@ -332,13 +482,15 @@ pcrs_job *pcrs_make_job(char *command, int *errptr) /* The substitute */ case 2: - newjob->substitute = pcrs_compile_replacement(token, errptr); - if (newjob->substitute == NULL) + if ((newjob->substitute = pcrs_compile_replacement(token, errptr)) == NULL) { pcrs_free_job(newjob); return NULL; } - break; + else + { + break; + } /* The options */ case 3: @@ -356,6 +508,14 @@ pcrs_job *pcrs_make_job(char *command, int *errptr) } free(token); + /* We have a valid substitute? */ + if (newjob->substitute == NULL) + { + *errptr = PCRS_ERR_CMDSYNTAX; + pcrs_free_job(newjob); + return NULL; + } + /* Compile the pattern */ newjob->pattern = pcre_compile(dummy, newjob->options, &error, errptr, NULL); if (newjob->pattern == NULL) @@ -435,6 +595,9 @@ pcrs_job *create_pcrs_job(pcre *pattern, pcre_extra *hints, int options, int glo * It is the caller's responsibility to free the result when * it's no longer needed. * + * FIXME: MUST HANDLE SUBJECTS THAT ARE LONGER THAN subject_length + CORRECTLY! --oes + * * Parameters : * 1 : job = the pcrs_job to be executed * 2 : subject = the subject (== original) string @@ -453,8 +616,17 @@ int pcrs_exec_substitution(pcrs_job *job, char *subject, int subject_length, cha pcrs_match matches[PCRS_MAX_MATCHES]; char *result_offset; + + /* Sanity first */ + if (job == NULL || job->pattern == NULL || job->substitute == NULL) + { + *result = NULL; + return(PCRS_ERR_BADJOB); + } + newsize=subject_length; + /* Find.. */ while ((submatches = pcre_exec(job->pattern, job->hints, subject, subject_length, offset, 0, offsets, 99)) > 0) { @@ -483,12 +655,14 @@ int pcrs_exec_substitution(pcrs_job *job, char *subject, int subject_length, cha if (submatches < -1) return submatches; /* Pass pcre error through */ matches_found = i; + /* ..get memory ..*/ if ((*result = (char *)malloc(newsize)) == NULL) /* must be free()d by caller */ { return PCRS_ERR_NOMEM; } + /* ..and replace */ offset = 0; result_offset = *result;