-const char pcrs_rcs[] = "$Id: pcrs.c,v 1.1 2001/05/13 21:57:07 administrator Exp $";
+const char pcrs_rcs[] = "$Id: pcrs.c,v 1.6 2001/06/03 11:03:48 oes Exp $";
/*********************************************************************
*
- * File : $Source: /home/administrator/cvs/ijb/pcrs.c,v $
+ * File : $Source: /cvsroot/ijbswa/current/pcrs.c,v $
*
- * Purpose : This is the pre-pre-alpha realease of libpcrs. It is only
- * published at this (ugly) stage of development, because it is
+ * Purpose : This is the alpha release of libpcrs. It is only published
+ * at this early stage of development, because it is
* needed for a new feature in JunkBuster.
*
- * Apart from the code being quite a mess, no inconsistencies,
- * memory leaks or functional bugs **should** be present.
- *
- * While you ROTFL at the code, you could just as well mail me
- * (andreas@oesterhelt.org) with advice for improvement.
+ * While no inconsistencies, memory leaks or functional bugs
+ * are known at this time, there *could* be plenty ;-). Also,
+ * Many pcre-specific options are not yet supported, and
+ * error handling needs improvement.
*
* pcrs is a supplement to the brilliant pcre library by Philip
* Hazel (ph10@cam.ac.uk) and adds Perl-style substitution. That
* Currently, there's no documentation besides comments and the
* source itself ;-)
*
- * Copyright : Written and copyright by andreas@oesterhelt.org
+ * Copyright : Written and Copyright (C) 2000 by Andreas Oesterhelt
+ * <andreas@oesterhelt.org>
+ *
+ * This program is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU General
+ * Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will
+ * be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU General Public
+ * License for more details.
+ *
+ * The GNU General Public License should be included with
+ * this file. If not, you can view it at
+ * http://www.gnu.org/copyleft/gpl.html
+ * or write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Revisions :
* $Log: pcrs.c,v $
+ * Revision 1.6 2001/06/03 11:03:48 oes
+ * Makefile/in
+ *
+ * introduced cgi.c
+ *
+ * actions.c:
+ *
+ * adapted to new enlist_unique arg format
+ *
+ * conf loadcfg.c
+ *
+ * introduced confdir option
+ *
+ * filters.c filtrers.h
+ *
+ * extracted-CGI relevant stuff
+ *
+ * jbsockets.c
+ *
+ * filled comment
+ *
+ * jcc.c
+ *
+ * support for new cgi mechansim
+ *
+ * list.c list.h
+ *
+ * functions for new list type: "map"
+ * extended enlist_unique
+ *
+ * miscutil.c .h
+ * introduced bindup()
+ *
+ * parsers.c parsers.h
+ *
+ * deleted const struct interceptors
+ *
+ * pcrs.c
+ * added FIXME
+ *
+ * project.h
+ *
+ * added struct map
+ * added struct http_response
+ * changes struct interceptors to struct cgi_dispatcher
+ * moved HTML stuff to cgi.h
+ *
+ * re_filterfile:
+ *
+ * changed
+ *
+ * showargs.c
+ * NO TIME LEFT
+ *
+ * Revision 1.5 2001/05/29 09:50:24 jongfoster
+ * Unified blocklist/imagelist/permissionslist.
+ * File format is still under discussion, but the internal changes
+ * are (mostly) done.
+ *
+ * Also modified interceptor behaviour:
+ * - We now intercept all URLs beginning with one of the following
+ * prefixes (and *only* these prefixes):
+ * * http://i.j.b/
+ * * http://ijbswa.sf.net/config/
+ * * http://ijbswa.sourceforge.net/config/
+ * - New interceptors "home page" - go to http://i.j.b/ to see it.
+ * - Internal changes so that intercepted and fast redirect pages
+ * are not replaced with an image.
+ * - Interceptors now have the option to send a binary page direct
+ * to the client. (i.e. ijb-send-banner uses this)
+ * - Implemented show-url-info interceptor. (Which is why I needed
+ * the above interceptors changes - a typical URL is
+ * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
+ * The previous mechanism would not have intercepted that, and
+ * if it had been intercepted then it then it would have replaced
+ * it with an image.)
+ *
+ * Revision 1.4 2001/05/25 14:12:40 oes
+ * Fixed bug: Empty substitutes now detected
+ *
+ * Revision 1.3 2001/05/25 11:03:55 oes
+ * Added sanity check for NULL jobs to pcrs_exec_substitution
+ *
+ * Revision 1.2 2001/05/22 18:46:04 oes
+ *
+ * - Enabled filtering banners by size rather than URL
+ * by adding patterns that replace all standard banner
+ * sizes with the "Junkbuster" gif to the re_filterfile
+ *
+ * - Enabled filtering WebBugs by providing a pattern
+ * which kills all 1x1 images
+ *
+ * - Added support for PCRE_UNGREEDY behaviour to pcrs,
+ * which is selected by the (nonstandard and therefore
+ * capital) letter 'U' in the option string.
+ * It causes the quantifiers to be ungreedy by default.
+ * Appending a ? turns back to greedy (!).
+ *
+ * - Added a new interceptor ijb-send-banner, which
+ * sends back the "Junkbuster" gif. Without imagelist or
+ * MSIE detection support, or if tinygif = 1, or the
+ * URL isn't recognized as an imageurl, a lame HTML
+ * explanation is sent instead.
+ *
+ * - Added new feature, which permits blocking remote
+ * script redirects and firing back a local redirect
+ * to the browser.
+ * The feature is conditionally compiled, i.e. it
+ * can be disabled with --disable-fast-redirects,
+ * plus it must be activated by a "fast-redirects"
+ * line in the config file, has its own log level
+ * and of course wants to be displayed by show-proxy-args
+ * Note: Boy, all the #ifdefs in 1001 locations and
+ * all the fumbling with configure.in and acconfig.h
+ * were *way* more work than the feature itself :-(
+ *
+ * - Because a generic redirect template was needed for
+ * this, tinygif = 3 now uses the same.
+ *
+ * - Moved GIFs, and other static HTTP response templates
+ * to project.h
+ *
+ * - Some minor fixes
+ *
+ * - Removed some >400 CRs again (Jon, you really worked
+ * a lot! ;-)
+ *
+ * Revision 1.1.1.1 2001/05/15 13:59:02 oes
+ * Initial import of version 2.9.3 source tree
+ *
*
*********************************************************************/
\f
*********************************************************************/
int pcrs_compile_perl_options(char *optstring, int *globalflag)
{
- int i, rc = 0;
+ size_t i;
+ int rc = 0;
*globalflag = 0;
for (i=0; i < strlen(optstring); i++)
{
case 'o': break;
case 's': rc |= PCRE_DOTALL; break;
case 'x': rc |= PCRE_EXTENDED; break;
+ case 'U': rc |= PCRE_UNGREEDY; break;
default: break;
}
}
{
switch (i)
{
- /* We don't care about the command and assume 's' */
+ /* We don't care about the command and assume 's' */
case 0:
break;
/* The substitute */
case 2:
- newjob->substitute = pcrs_compile_replacement(token, errptr);
- if (newjob->substitute == NULL)
+ if ((newjob->substitute = pcrs_compile_replacement(token, errptr)) == NULL)
{
pcrs_free_job(newjob);
return NULL;
}
- break;
+ else
+ {
+ break;
+ }
/* The options */
case 3:
}
free(token);
+ /* We have a valid substitute? */
+ if (newjob->substitute == NULL)
+ {
+ *errptr = PCRS_ERR_CMDSYNTAX;
+ pcrs_free_job(newjob);
+ return NULL;
+ }
+
/* Compile the pattern */
newjob->pattern = pcre_compile(dummy, newjob->options, &error, errptr, NULL);
if (newjob->pattern == NULL)
* It is the caller's responsibility to free the result when
* it's no longer needed.
*
+ * FIXME: MUST HANDLE SUBJECTS THAT ARE LONGER THAN subject_length
+ CORRECTLY! --oes
+ *
* Parameters :
* 1 : job = the pcrs_job to be executed
* 2 : subject = the subject (== original) string
pcrs_match matches[PCRS_MAX_MATCHES];
char *result_offset;
+
+ /* Sanity first */
+ if (job == NULL || job->pattern == NULL || job->substitute == NULL)
+ {
+ *result = NULL;
+ return(PCRS_ERR_BADJOB);
+ }
+
newsize=subject_length;
+
/* Find.. */
while ((submatches = pcre_exec(job->pattern, job->hints, subject, subject_length, offset, 0, offsets, 99)) > 0)
{
if (submatches < -1) return submatches; /* Pass pcre error through */
matches_found = i;
+
/* ..get memory ..*/
if ((*result = (char *)malloc(newsize)) == NULL) /* must be free()d by caller */
{
return PCRS_ERR_NOMEM;
}
+
/* ..and replace */
offset = 0;
result_offset = *result;