-const char loaders_rcs[] = "$Id: loaders.c,v 1.14 2001/06/01 03:27:04 oes Exp $";
+const char loaders_rcs[] = "$Id: loaders.c,v 1.55 2006/09/07 10:25:39 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/loaders.c,v $
*
* Purpose : Functions to load and unload the various
* configuration files. Also contains code to manage
- * the list of active loaders, and to automatically
+ * the list of active loaders, and to automatically
* unload files that are no longer in use.
*
* Copyright : Written by and Copyright (C) 2001 the SourceForge
- * IJBSWA team. http://ijbswa.sourceforge.net
+ * Privoxy team. http://www.privoxy.org/
*
* Based on the Internet Junkbuster originally written
- * by and Copyright (C) 1997 Anonymous Coders and
+ * by and Copyright (C) 1997 Anonymous Coders and
* Junkbusters Corporation. http://www.junkbusters.com
*
- * This program is free software; you can redistribute it
+ * This program is free software; you can redistribute it
* and/or modify it under the terms of the GNU General
* Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at
*
* Revisions :
* $Log: loaders.c,v $
+ * Revision 1.55 2006/09/07 10:25:39 fabiankeil
+ * Fix typo.
+ *
+ * Revision 1.54 2006/09/07 10:22:20 fabiankeil
+ * If too many trusted referrers are used,
+ * print only one error message instead of logging
+ * every single trusted referrer above the arbitrary
+ * limit.
+ *
+ * Revision 1.53 2006/08/31 16:25:06 fabiankeil
+ * Work around a buffer overflow that caused Privoxy to
+ * segfault if too many trusted referrers were used. Good
+ * enough for now, but should be replaced with a real
+ * solution after the next release.
+ *
+ * Revision 1.52 2006/07/18 14:48:46 david__schmidt
+ * Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch)
+ * with what was really the latest development (the v_3_0_branch branch)
+ *
+ * Revision 1.50.2.8 2006/01/30 15:16:25 david__schmidt
+ * Remove a little residual debugging info
+ *
+ * Revision 1.50.2.7 2006/01/29 23:10:56 david__schmidt
+ * Multiple filter file support
+ *
+ * Revision 1.50.2.6 2003/10/24 10:17:54 oes
+ * Nit: Allowed tabs as separators in filter headings
+ *
+ * Revision 1.50.2.5 2003/05/08 15:19:15 oes
+ * sweep: Made loop structure of sweep step mirror that of mark step
+ *
+ * Revision 1.50.2.4 2003/05/06 15:57:12 oes
+ * Bugfix: Update last_active pointer in sweep() before
+ * leaving an active client. Closes bugs #724395, #727882
+ *
+ * Revision 1.50.2.3 2002/11/20 17:12:30 oes
+ * Ooops, forgot one change.
+ *
+ * Revision 1.50.2.2 2002/11/20 14:38:15 oes
+ * Fixed delayed/incomplete freeing of client resources and
+ * simplified loop structure in sweep.
+ * Thanks to Oliver Stoeneberg for the hint.
+ *
+ * Revision 1.50.2.1 2002/07/26 15:19:24 oes
+ * - PCRS jobs now chained in order of appearance. Previous
+ * reverse chaining was counter-intuitive.
+ * - Changed loglevel of PCRS job compile errors to
+ * LOG_LEVEL_ERROR
+ *
+ * Revision 1.50 2002/04/24 02:12:16 oes
+ * Jon's multiple AF patch: Sweep now takes care of all AFs
+ *
+ * Revision 1.49 2002/04/19 16:53:25 jongfoster
+ * Optimize away a function call by using an equivalent macro
+ *
+ * Revision 1.48 2002/04/05 00:56:09 gliptak
+ * Correcting typo to clean up on realloc failure
+ *
+ * Revision 1.47 2002/03/26 22:29:55 swa
+ * we have a new homepage!
+ *
+ * Revision 1.46 2002/03/24 13:25:43 swa
+ * name change related issues
+ *
+ * Revision 1.45 2002/03/16 23:54:06 jongfoster
+ * Adding graceful termination feature, to help look for memory leaks.
+ * If you enable this (which, by design, has to be done by hand
+ * editing config.h) and then go to http://i.j.b/die, then the program
+ * will exit cleanly after the *next* request. It should free all the
+ * memory that was used.
+ *
+ * Revision 1.44 2002/03/16 21:51:00 jongfoster
+ * Fixing free(NULL).
+ *
+ * Revision 1.43 2002/03/16 20:28:34 oes
+ * Added descriptions to the filters so users will know what they select in the cgi editor
+ *
+ * Revision 1.42 2002/03/13 00:27:05 jongfoster
+ * Killing warnings
+ *
+ * Revision 1.41 2002/03/12 01:42:50 oes
+ * Introduced modular filters
+ *
+ * Revision 1.40 2002/03/08 17:46:04 jongfoster
+ * Fixing int/size_t warnings
+ *
+ * Revision 1.39 2002/03/07 03:46:17 oes
+ * Fixed compiler warnings
+ *
+ * Revision 1.38 2002/03/06 22:54:35 jongfoster
+ * Automated function-comment nitpicking.
+ *
+ * Revision 1.37 2002/03/03 15:07:49 oes
+ * Re-enabled automatic config reloading
+ *
+ * Revision 1.36 2002/01/22 23:46:18 jongfoster
+ * Moving edit_read_line() and simple_read_line() to loaders.c, and
+ * extending them to support reading MS-DOS, Mac and UNIX style files
+ * on all platforms.
+ *
+ * Modifying read_config_line() (without changing it's prototype) to
+ * be a trivial wrapper for edit_read_line(). This means that we have
+ * one function to read a line and handle comments, which is common
+ * between the initialization code and the edit interface.
+ *
+ * Revision 1.35 2002/01/17 21:03:08 jongfoster
+ * Moving all our URL and URL pattern parsing code to urlmatch.c.
+ *
+ * Renaming free_url to free_url_spec, since it frees a struct url_spec.
+ *
+ * Revision 1.34 2001/12/30 14:07:32 steudten
+ * - Add signal handling (unix)
+ * - Add SIGHUP handler (unix)
+ * - Add creation of pidfile (unix)
+ * - Add action 'top' in rc file (RH)
+ * - Add entry 'SIGNALS' to manpage
+ * - Add exit message to logfile (unix)
+ *
+ * Revision 1.33 2001/11/13 00:16:38 jongfoster
+ * Replacing references to malloc.h with the standard stdlib.h
+ * (See ANSI or K&R 2nd Ed)
+ *
+ * Revision 1.32 2001/11/07 00:02:13 steudten
+ * Add line number in error output for lineparsing for
+ * actionsfile and configfile.
+ * Special handling for CLF added.
+ *
+ * Revision 1.31 2001/10/26 17:39:01 oes
+ * Removed csp->referrer
+ * Moved ijb_isspace and ijb_tolower to project.h
+ *
+ * Revision 1.30 2001/10/25 03:40:48 david__schmidt
+ * Change in porting tactics: OS/2's EMX porting layer doesn't allow multiple
+ * threads to call select() simultaneously. So, it's time to do a real, live,
+ * native OS/2 port. See defines for __EMX__ (the porting layer) vs. __OS2__
+ * (native). Both versions will work, but using __OS2__ offers multi-threading.
+ *
+ * Revision 1.29 2001/10/23 21:38:53 jongfoster
+ * Adding error-checking to create_url_spec()
+ *
+ * Revision 1.28 2001/10/07 15:40:39 oes
+ * Replaced 6 boolean members of csp with one bitmap (csp->flags)
+ *
+ * Revision 1.27 2001/09/22 16:36:59 jongfoster
+ * Removing unused parameter fs from read_config_line()
+ *
+ * Revision 1.26 2001/09/22 14:05:22 jongfoster
+ * Bugfix: Multiple escaped "#" characters in a configuration
+ * file are now permitted.
+ * Also removing 3 unused headers.
+ *
+ * Revision 1.25 2001/09/13 22:44:03 jongfoster
+ * Adding {} to an if statement
+ *
+ * Revision 1.24 2001/07/30 22:08:36 jongfoster
+ * Tidying up #defines:
+ * - All feature #defines are now of the form FEATURE_xxx
+ * - Permanently turned off WIN_GUI_EDIT
+ * - Permanently turned on WEBDAV and SPLIT_PROXY_ARGS
+ *
+ * Revision 1.23 2001/07/20 15:51:54 oes
+ * Fixed indentation of prepocessor commands
+ *
+ * Revision 1.22 2001/07/20 15:16:17 haroon
+ * - per Guy's suggestion, added a while loop in sweep() to catch not just
+ * the last inactive CSP but all other consecutive inactive CSPs after that
+ * as well
+ *
+ * Revision 1.21 2001/07/18 17:26:24 oes
+ * Changed to conform to new pcrs interface
+ *
+ * Revision 1.20 2001/07/17 13:07:01 oes
+ * Fixed segv when last line in config files
+ * lacked a terminating (\r)\n
+ *
+ * Revision 1.19 2001/07/13 14:01:54 oes
+ * Removed all #ifdef PCRS
+ *
+ * Revision 1.18 2001/06/29 21:45:41 oes
+ * Indentation, CRLF->LF, Tab-> Space
+ *
+ * Revision 1.17 2001/06/29 13:31:51 oes
+ * Various adaptions
+ *
+ * Revision 1.16 2001/06/09 10:55:28 jongfoster
+ * Changing BUFSIZ ==> BUFFER_SIZE
+ *
+ * Revision 1.15 2001/06/07 23:14:14 jongfoster
+ * Removing ACL and forward file loaders - these
+ * files have been merged into the config file.
+ * Cosmetic: Moving unloader funcs next to their
+ * respective loader funcs
+ *
* Revision 1.14 2001/06/01 03:27:04 oes
* Fixed line continuation problem
*
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>
-#include <malloc.h>
#include <errno.h>
#include <sys/stat.h>
#include <ctype.h>
+#include <assert.h>
-#ifndef _WIN32
+#if !defined(_WIN32) && !defined(__OS2__)
#include <unistd.h>
#endif
#include "project.h"
#include "list.h"
#include "loaders.h"
-#include "encode.h"
#include "filters.h"
#include "parsers.h"
#include "jcc.h"
-#include "ssplit.h"
#include "miscutil.h"
#include "errlog.h"
-#include "gateway.h"
#include "actions.h"
-
-#ifndef SPLIT_PROXY_ARGS
-/* For strsav */
-#include "showargs.h"
-#endif /* ndef SPLIT_PROXY_ARGS */
+#include "urlmatch.h"
const char loaders_h_rcs[] = LOADERS_H_VERSION;
-/* Fix a problem with Solaris. There should be no effect on other
- * platforms.
- * Solaris's isspace() is a macro which uses it's argument directly
- * as an array index. Therefore we need to make sure that high-bit
- * characters generate +ve values, and ideally we also want to make
- * the argument match the declared parameter type of "int".
- */
-#define ijb_isspace(__X) isspace((int)(unsigned char)(__X))
-
-
/*
* Currently active files.
* These are also entered in the main linked list of files.
*/
-#ifdef TRUST_FILES
+#ifdef FEATURE_TRUST
static struct file_list *current_trustfile = NULL;
-#endif /* def TRUST_FILES */
+#endif /* def FEATURE_TRUST */
+
+static int load_one_re_filterfile(struct client_state *csp, int fileid);
+
+static struct file_list *current_re_filterfile[MAX_AF_FILES] = {
+ NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL
+};
-#ifdef PCRS
-static struct file_list *current_re_filterfile = NULL;
-#endif /* def PCRS */
/*********************************************************************
void sweep(void)
{
struct file_list *fl, *nfl;
- struct client_state *csp, *ncsp;
+ struct client_state *csp, *last_active;
+ int i;
/* clear all of the file's active flags */
for ( fl = files->next; NULL != fl; fl = fl->next )
fl->active = 0;
}
- for (csp = clients; csp && (ncsp = csp->next) ; csp = csp->next)
+ last_active = clients;
+ csp = clients->next;
+
+ while (NULL != csp)
{
- if (ncsp->active)
+ if (csp->flags & CSP_FLAG_ACTIVE)
{
- /* mark this client's files as active */
+ /* Mark this client's files as active */
/*
* Always have a configuration file.
* (Also note the slightly non-standard extra
* indirection here.)
*/
- ncsp->config->config_file_list->active = 1;
+ csp->config->config_file_list->active = 1;
- if (ncsp->actions_list) /* actions files */
+ /*
+ * Actions files
+ */
+ for (i = 0; i < MAX_AF_FILES; i++)
{
- ncsp->actions_list->active = 1;
+ if (csp->actions_list[i])
+ {
+ csp->actions_list[i]->active = 1;
+ }
}
-#ifdef PCRS
- if (ncsp->rlist) /* perl re files */
+ /*
+ * Filter files
+ */
+ for (i = 0; i < MAX_AF_FILES; i++)
{
- ncsp->rlist->active = 1;
+ if (csp->rlist[i])
+ {
+ csp->rlist[i]->active = 1;
+ }
}
-#endif /* def PCRS */
-#ifdef TRUST_FILES
- if (ncsp->tlist) /* trust files */
+ /*
+ * Trust file
+ */
+#ifdef FEATURE_TRUST
+ if (csp->tlist)
{
- ncsp->tlist->active = 1;
+ csp->tlist->active = 1;
}
-#endif /* def TRUST_FILES */
+#endif /* def FEATURE_TRUST */
+
+ last_active = csp;
+ csp = csp->next;
}
- else
+ else
+ /*
+ * This client is not active. Free its resources.
+ */
{
- /* this client one is not active, release its resources */
- csp->next = ncsp->next;
+ last_active->next = csp->next;
- freez(ncsp->ip_addr_str);
-#ifdef TRUST_FILES
- freez(ncsp->referrer);
-#endif /* def TRUST_FILES */
- freez(ncsp->x_forwarded);
- freez(ncsp->iob->buf);
+ freez(csp->ip_addr_str);
+ freez(csp->my_ip_addr_str);
+ freez(csp->my_hostname);
+ freez(csp->x_forwarded);
+ freez(csp->iob->buf);
- free_http_request(ncsp->http);
+ free_http_request(csp->http);
- destroy_list(ncsp->headers);
- destroy_list(ncsp->cookie_list);
+ destroy_list(csp->headers);
+ destroy_list(csp->cookie_list);
- free_current_action(ncsp->action);
+ free_current_action(csp->action);
-#ifdef STATISTICS
+#ifdef FEATURE_STATISTICS
urls_read++;
- if (ncsp->rejected)
+ if (csp->flags & CSP_FLAG_REJECTED)
{
urls_rejected++;
}
-#endif /* def STATISTICS */
-
- freez(ncsp);
- }
- }
-
- for (fl = files; fl && (nfl = fl->next) ; fl = fl->next)
- {
- if ( ( 0 == nfl->active ) && ( NULL != nfl->unloader ) )
- {
- fl->next = nfl->next;
-
- (nfl->unloader)(nfl->f);
+#endif /* def FEATURE_STATISTICS */
-#ifndef SPLIT_PROXY_ARGS
- freez(nfl->proxy_args);
-#endif /* ndef SPLIT_PROXY_ARGS */
-
- freez(nfl->filename);
-
- freez(nfl);
+ freez(csp);
+
+ csp = last_active->next;
}
}
-}
-
-
-/*********************************************************************
- *
- * Function : create_url_spec
- *
- * Description : Creates a "url_spec" structure from a string.
- * When finished, free with unload_url().
- *
- * Parameters :
- * 1 : url = Target url_spec to be filled in. Must be
- * zeroed out before the call (e.g. using zalloc).
- * 2 : buf = Source pattern, null terminated. NOTE: The
- * contents of this buffer are destroyed by this
- * function. If this function succeeds, the
- * buffer is copied to url->spec. If this
- * function fails, the contents of the buffer
- * are lost forever.
- *
- * Returns : 0 => Ok, everything else is an error.
- *
- *********************************************************************/
-int create_url_spec(struct url_spec * url, char * buf)
-{
- char *p;
- struct url_spec tmp_url[1];
-
- /* paranoia - should never happen. */
- if ((url == NULL) || (buf == NULL))
- {
- return 1;
- }
-
- /* save a copy of the orignal specification */
- if ((url->spec = strdup(buf)) == NULL)
- {
- return 1;
- }
+ nfl = files;
+ fl = files->next;
- if ((p = strchr(buf, '/')))
+ while (fl != NULL)
{
- if (NULL == (url->path = strdup(p)))
+ if ( ( 0 == fl->active ) && ( NULL != fl->unloader ) )
{
- freez(url->spec);
- return 1;
- }
- url->pathlen = strlen(url->path);
- *p = '\0';
- }
- else
- {
- url->path = NULL;
- url->pathlen = 0;
- }
-#ifdef REGEX
- if (url->path)
- {
- int errcode;
- char rebuf[BUFSIZ];
+ nfl->next = fl->next;
- if (NULL == (url->preg = zalloc(sizeof(*url->preg))))
- {
- freez(url->spec);
- freez(url->path);
- return 1;
- }
+ (fl->unloader)(fl->f);
- sprintf(rebuf, "^(%s)", url->path);
+ freez(fl->filename);
+ freez(fl);
- errcode = regcomp(url->preg, rebuf,
- (REG_EXTENDED|REG_NOSUB|REG_ICASE));
- if (errcode)
+ fl = nfl->next;
+ }
+ else
{
- size_t errlen =
- regerror(errcode,
- url->preg, buf, sizeof(buf));
-
- buf[errlen] = '\0';
-
- log_error(LOG_LEVEL_ERROR, "error compiling %s: %s",
- url->spec, buf);
-
- freez(url->spec);
- freez(url->path);
- freez(url->preg);
-
- return 1;
+ nfl = fl;
+ fl = fl->next;
}
}
-#endif
- if ((p = strchr(buf, ':')) == NULL)
- {
- url->port = 0;
- }
- else
- {
- *p++ = '\0';
- url->port = atoi(p);
- }
-
- if ((url->domain = strdup(buf)) == NULL)
- {
- freez(url->spec);
- freez(url->path);
-#ifdef REGEX
- freez(url->preg);
-#endif /* def REGEX */
- return 1;
- }
-
- /* split domain into components */
-
- *tmp_url = dsplit(url->domain);
- url->dbuf = tmp_url->dbuf;
- url->dcnt = tmp_url->dcnt;
- url->dvec = tmp_url->dvec;
- url->unanchored = tmp_url->unanchored;
-
- return 0; /* OK */
-}
-
-
-/*********************************************************************
- *
- * Function : free_url
- *
- * Description : Called from the "unloaders". Freez the url
- * structure elements.
- *
- * Parameters :
- * 1 : url = pointer to a url_spec structure.
- *
- * Returns : N/A
- *
- *********************************************************************/
-void free_url(struct url_spec *url)
-{
- if (url == NULL) return;
-
- freez(url->spec);
- freez(url->domain);
- freez(url->dbuf);
- freez(url->dvec);
- freez(url->path);
-#ifdef REGEX
- if (url->preg)
- {
- regfree(url->preg);
- freez(url->preg);
- }
-#endif
}
*
* Description : Helper function to check if a file needs reloading.
* If "current" is still current, return it. Otherwise
- * allocates a new (zeroed) "struct file_list", fills
+ * allocates a new (zeroed) "struct file_list", fills
* in the disk file name and timestamp, and returns it.
*
* Parameters :
* 1 : current = The file_list currently being used - will
- * be checked to see if it is out of date.
+ * be checked to see if it is out of date.
* May be NULL (which is treated as out of
* date).
* 2 : filename = Name of file to check.
* This will be set to NULL, OR a struct
* file_list newly allocated on the
* heap, with the filename and lastmodified
- * fields filled, standard header giving file
- * name in proxy_args, and all others zeroed.
- * (proxy_args is only filled in if !defined
- * SPLIT_PROXY_ARGS and !suppress_blocklists).
+ * fields filled, and all others zeroed.
*
* Returns : If file unchanged: 0 (and sets newfl == NULL)
* If file changed: 1 and sets newfl != NULL
}
fs = (struct file_list *)zalloc(sizeof(struct file_list));
-
if (fs == NULL)
{
/* Out of memory error */
return 1;
}
+
fs->filename = strdup(filename);
fs->lastmodified = statbuf->st_mtime;
freez (fs);
return 1;
}
+ *newfl = fs;
+ return 1;
+}
+
-#ifndef SPLIT_PROXY_ARGS
- if (!suppress_blocklists)
+/*********************************************************************
+ *
+ * Function : simple_read_line
+ *
+ * Description : Read a single line from a file and return it.
+ * This is basically a version of fgets() that malloc()s
+ * it's own line buffer. Note that the buffer will
+ * always be a multiple of BUFFER_SIZE bytes long.
+ * Therefore if you are going to keep the string for
+ * an extended period of time, you should probably
+ * strdup() it and free() the original, to save memory.
+ *
+ *
+ * Parameters :
+ * 1 : dest = destination for newly malloc'd pointer to
+ * line data. Will be set to NULL on error.
+ * 2 : fp = File to read from
+ * 3 : newline = Standard for newlines in the file.
+ * Will be unchanged if it's value on input is not
+ * NEWLINE_UNKNOWN.
+ * On output, may be changed from NEWLINE_UNKNOWN to
+ * actual convention in file.
+ *
+ * Returns : JB_ERR_OK on success
+ * JB_ERR_MEMORY on out-of-memory
+ * JB_ERR_FILE on EOF.
+ *
+ *********************************************************************/
+jb_err simple_read_line(FILE *fp, char **dest, int *newline)
+{
+ size_t len = 0;
+ size_t buflen = BUFFER_SIZE;
+ char * buf;
+ char * p;
+ int ch;
+ int realnewline = NEWLINE_UNKNOWN;
+
+ if (NULL == (buf = malloc(buflen)))
{
- char * p = html_encode(filename);
- if (p)
+ return JB_ERR_MEMORY;
+ }
+
+ p = buf;
+
+/*
+ * Character codes. If you have a wierd compiler and the following are
+ * incorrect, you also need to fix NEWLINE() in loaders.h
+ */
+#define CHAR_CR '\r' /* ASCII 13 */
+#define CHAR_LF '\n' /* ASCII 10 */
+
+ for (;;)
+ {
+ ch = getc(fp);
+ if (ch == EOF)
{
- fs->proxy_args = strsav(fs->proxy_args, "<h2>The file `");
- fs->proxy_args = strsav(fs->proxy_args, p);
- fs->proxy_args = strsav(fs->proxy_args,
- "' contains the following patterns</h2>\n");
- freez(p);
+ if (len > 0)
+ {
+ *p = '\0';
+ *dest = buf;
+ return JB_ERR_OK;
+ }
+ else
+ {
+ free(buf);
+ *dest = NULL;
+ return JB_ERR_FILE;
+ }
+ }
+ else if (ch == CHAR_CR)
+ {
+ ch = getc(fp);
+ if (ch == CHAR_LF)
+ {
+ if (*newline == NEWLINE_UNKNOWN)
+ {
+ *newline = NEWLINE_DOS;
+ }
+ }
+ else
+ {
+ if (ch != EOF)
+ {
+ ungetc(ch, fp);
+ }
+ if (*newline == NEWLINE_UNKNOWN)
+ {
+ *newline = NEWLINE_MAC;
+ }
+ }
+ *p = '\0';
+ *dest = buf;
+ if (*newline == NEWLINE_UNKNOWN)
+ {
+ *newline = realnewline;
+ }
+ return JB_ERR_OK;
+ }
+ else if (ch == CHAR_LF)
+ {
+ *p = '\0';
+ *dest = buf;
+ if (*newline == NEWLINE_UNKNOWN)
+ {
+ *newline = NEWLINE_UNIX;
+ }
+ return JB_ERR_OK;
+ }
+ else if (ch == 0)
+ {
+ *p = '\0';
+ *dest = buf;
+ return JB_ERR_OK;
}
- fs->proxy_args = strsav(fs->proxy_args, "<pre>");
- }
-#endif /* ndef SPLIT_PROXY_ARGS */
- *newfl = fs;
- return 1;
+ *p++ = ch;
+
+ if (++len >= buflen)
+ {
+ buflen += BUFFER_SIZE;
+ if (NULL == (p = realloc(buf, buflen)))
+ {
+ free(buf);
+ return JB_ERR_MEMORY;
+ }
+ buf = p;
+ p = buf + len;
+ }
+ }
}
/*********************************************************************
*
- * Function : read_config_line
+ * Function : edit_read_line
*
* Description : Read a single non-empty line from a file and return
* it. Trims comments, leading and trailing whitespace
* and respects escaping of newline and comment char.
- * Also writes the file to fs->proxy_args.
+ * Provides the line in 2 alternative forms: raw and
+ * preprocessed.
+ * - raw is the raw data read from the file. If the
+ * line is not modified, then this should be written
+ * to the new file.
+ * - prefix is any comments and blank lines that were
+ * read from the file. If the line is modified, then
+ * this should be written out to the file followed
+ * by the modified data. (If this string is non-empty
+ * then it will have a newline at the end).
+ * - data is the actual data that will be parsed
+ * further by appropriate routines.
+ * On EOF, the 3 strings will all be set to NULL and
+ * 0 will be returned.
*
* Parameters :
- * 1 : buf = Buffer to use.
- * 2 : buflen = Size of buffer in bytes.
- * 3 : fp = File to read from
- * 4 : fs = File will be written to fs->proxy_args. May
- * be NULL to disable this feature.
- *
- * Returns : NULL on EOF or error
- * Otherwise, returns buf.
+ * 1 : fp = File to read from
+ * 2 : raw_out = destination for newly malloc'd pointer to
+ * raw line data. May be NULL if you don't want it.
+ * 3 : prefix_out = destination for newly malloc'd pointer to
+ * comments. May be NULL if you don't want it.
+ * 4 : data_out = destination for newly malloc'd pointer to
+ * line data with comments and leading/trailing spaces
+ * removed, and line continuation performed. May be
+ * NULL if you don't want it.
+ * 5 : newline = Standard for newlines in the file.
+ * On input, set to value to use or NEWLINE_UNKNOWN.
+ * On output, may be changed from NEWLINE_UNKNOWN to
+ * actual convention in file. May be NULL if you
+ * don't want it.
+ * 6 : line_number = Line number in file. In "lines" as
+ * reported by a text editor, not lines containing data.
+ *
+ * Returns : JB_ERR_OK on success
+ * JB_ERR_MEMORY on out-of-memory
+ * JB_ERR_FILE on EOF.
*
*********************************************************************/
-char *read_config_line(char *buf, int buflen, FILE *fp, struct file_list *fs)
+jb_err edit_read_line(FILE *fp,
+ char **raw_out,
+ char **prefix_out,
+ char **data_out,
+ int *newline,
+ unsigned long *line_number)
{
- char *p, *q;
- char linebuf[BUFSIZ];
- int contflag = 0;
+ char *p; /* Temporary pointer */
+ char *linebuf; /* Line read from file */
+ char *linestart; /* Start of linebuf, usually first non-whitespace char */
+ int contflag = 0; /* Nonzero for line continuation - i.e. line ends '\' */
+ int is_empty = 1; /* Flag if not got any data yet */
+ char *raw = NULL; /* String to be stored in raw_out */
+ char *prefix = NULL; /* String to be stored in prefix_out */
+ char *data = NULL; /* String to be stored in data_out */
+ int scrapnewline; /* Used for (*newline) if newline==NULL */
+ jb_err rval = JB_ERR_OK;
+
+ assert(fp);
+ assert(raw_out || data_out);
+ assert(newline == NULL
+ || *newline == NEWLINE_UNKNOWN
+ || *newline == NEWLINE_UNIX
+ || *newline == NEWLINE_DOS
+ || *newline == NEWLINE_MAC);
+
+ if (newline == NULL)
+ {
+ scrapnewline = NEWLINE_UNKNOWN;
+ newline = &scrapnewline;
+ }
+
+ /* Set output parameters to NULL */
+ if (raw_out)
+ {
+ *raw_out = NULL;
+ }
+ if (prefix_out)
+ {
+ *prefix_out = NULL;
+ }
+ if (data_out)
+ {
+ *data_out = NULL;
+ }
+
+ /* Set string variables to new, empty strings. */
+
+ if (raw_out)
+ {
+ if ((raw = malloc(1)) == NULL)
+ {
+ return JB_ERR_MEMORY;
+ }
+ *raw = '\0';
+ }
+ if (prefix_out)
+ {
+ if ((prefix = malloc(1)) == NULL)
+ {
+ freez(raw);
+ return JB_ERR_MEMORY;
+ }
+ *prefix = '\0';
+ }
+ if (data_out)
+ {
+ if ((data = malloc(1)) == NULL)
+ {
+ freez(raw);
+ freez(prefix);
+ return JB_ERR_MEMORY;
+ }
+ *data = '\0';
+ }
- *buf = '\0';
+ /* Main loop. Loop while we need more data & it's not EOF. */
- while (fgets(linebuf, sizeof(linebuf), fp))
+ while ( (contflag || is_empty)
+ && (JB_ERR_OK == (rval = simple_read_line(fp, &linebuf, newline))))
{
-#ifndef SPLIT_PROXY_ARGS
- if (fs && !suppress_blocklists)
+ if (line_number)
+ {
+ (*line_number)++;
+ }
+ if (raw)
{
- char *html_line = html_encode(linebuf);
- if (html_line != NULL)
+ string_append(&raw,linebuf);
+ if (string_append(&raw,NEWLINE(*newline)))
{
- fs->proxy_args = strsav(fs->proxy_args, html_line);
- freez(html_line);
+ freez(prefix);
+ freez(data);
+ free(linebuf);
+ return JB_ERR_MEMORY;
}
- fs->proxy_args = strsav(fs->proxy_args, "<br>");
}
-#endif /* ndef SPLIT_PROXY_ARGS */
- /* Trim off newline */
- if ((p = strpbrk(linebuf, "\r\n")) != NULL)
+ /* Line continuation? Trim escape and set flag. */
+ p = linebuf + strlen(linebuf) - 1;
+ contflag = ((*linebuf != '\0') && (*p == '\\'));
+ if (contflag)
{
*p = '\0';
}
- /* Line continuation? Trim escape and set flag. */
- if ((p != linebuf) && (*--p == '\\'))
+ /* Trim leading spaces if we're at the start of the line */
+ linestart = linebuf;
+ if (*data == '\0')
{
- contflag = 1;
- *p = '\0';
+ /* Trim leading spaces */
+ while (*linestart && isspace((int)(unsigned char)*linestart))
+ {
+ linestart++;
+ }
}
- /* If there's a comment char.. */
- if ((p = strpbrk(linebuf, "#")) != NULL)
+ /* Handle comment characters. */
+ p = linestart;
+ while ((p = strchr(p, '#')) != NULL)
{
- /* ..and it's escaped, left-shift the line over the escape. */
+ /* Found a comment char.. */
if ((p != linebuf) && (*(p-1) == '\\'))
{
- q = p-1;
- while ((*q++ = *p++) != '\0') /* nop */;
+ /* ..and it's escaped, left-shift the line over the escape. */
+ char *q = p - 1;
+ while ((*q = *(q + 1)) != '\0')
+ {
+ q++;
+ }
+ /* Now scan from just after the "#". */
}
- /* Else, chop off the rest of the line */
else
{
+ /* Real comment. Save it... */
+ if (p == linestart)
+ {
+ /* Special case: Line only contains a comment, so all the
+ * previous whitespace is considered part of the comment.
+ * Undo the whitespace skipping, if any.
+ */
+ linestart = linebuf;
+ p = linestart;
+ }
+ if (prefix)
+ {
+ string_append(&prefix,p);
+ if (string_append(&prefix, NEWLINE(*newline)))
+ {
+ freez(raw);
+ freez(data);
+ free(linebuf);
+ return JB_ERR_MEMORY;
+ }
+ }
+
+ /* ... and chop off the rest of the line */
*p = '\0';
}
- }
+ } /* END while (there's a # character) */
/* Write to the buffer */
- if (*linebuf)
+ if (*linestart)
{
- strncat(buf, linebuf, buflen - strlen(buf));
+ is_empty = 0;
+ if (data)
+ {
+ if (string_append(&data, linestart))
+ {
+ freez(raw);
+ freez(prefix);
+ free(linebuf);
+ return JB_ERR_MEMORY;
+ }
+ }
}
- /* Continue? */
- if (contflag)
- {
- contflag = 0;
- continue;
- }
+ free(linebuf);
+ } /* END while(we need more data) */
- /* Remove leading and trailing whitespace */
- chomp(buf);
+ /* Handle simple_read_line() errors - ignore EOF */
+ if ((rval != JB_ERR_OK) && (rval != JB_ERR_FILE))
+ {
+ freez(raw);
+ freez(prefix);
+ freez(data);
+ return rval;
+ }
- if (*buf)
+ if (raw ? (*raw == '\0') : is_empty)
+ {
+ /* EOF and no data there. (Definition of "data" depends on whether
+ * the caller cares about "raw" or just "data").
+ */
+
+ freez(raw);
+ freez(prefix);
+ freez(data);
+
+ return JB_ERR_FILE;
+ }
+ else
+ {
+ /* Got at least some data */
+
+ /* Remove trailing whitespace */
+ chomp(data);
+
+ if (raw_out)
+ {
+ *raw_out = raw;
+ }
+ else
{
- return buf;
+ freez(raw);
}
+ if (prefix_out)
+ {
+ *prefix_out = prefix;
+ }
+ else
+ {
+ freez(prefix);
+ }
+ if (data_out)
+ {
+ *data_out = data;
+ }
+ else
+ {
+ freez(data);
+ }
+ return JB_ERR_OK;
}
+}
- /* EOF */
- return NULL;
+/*********************************************************************
+ *
+ * Function : read_config_line
+ *
+ * Description : Read a single non-empty line from a file and return
+ * it. Trims comments, leading and trailing whitespace
+ * and respects escaping of newline and comment char.
+ *
+ * Parameters :
+ * 1 : buf = Buffer to use.
+ * 2 : buflen = Size of buffer in bytes.
+ * 3 : fp = File to read from
+ * 4 : linenum = linenumber in file
+ *
+ * Returns : NULL on EOF or error
+ * Otherwise, returns buf.
+ *
+ *********************************************************************/
+char *read_config_line(char *buf, size_t buflen, FILE *fp, unsigned long *linenum)
+{
+ jb_err err;
+ char *buf2 = NULL;
+ err = edit_read_line(fp, NULL, NULL, &buf2, NULL, linenum);
+ if (err)
+ {
+ if (err == JB_ERR_MEMORY)
+ {
+ log_error(LOG_LEVEL_FATAL, "Out of memory loading a config file");
+ }
+ return NULL;
+ }
+ else
+ {
+ assert(buf2);
+ assert(strlen(buf2) + 1U < buflen);
+ strncpy(buf, buf2, buflen - 1);
+ free(buf2);
+ buf[buflen - 1] = '\0';
+ return buf;
+ }
}
-#ifdef TRUST_FILES
+#ifdef FEATURE_TRUST
/*********************************************************************
*
* Function : unload_trustfile
*********************************************************************/
static void unload_trustfile(void *f)
{
- struct block_spec *b = (struct block_spec *)f;
- if (b == NULL) return;
+ struct block_spec *cur = (struct block_spec *)f;
+ struct block_spec *next;
- unload_trustfile(b->next);
+ while (cur != NULL)
+ {
+ next = cur->next;
- free_url(b->url);
+ free_url_spec(cur->url);
+ free(cur);
- freez(b);
+ cur = next;
+ }
}
+#ifdef FEATURE_GRACEFUL_TERMINATION
+/*********************************************************************
+ *
+ * Function : unload_current_trust_file
+ *
+ * Description : Unloads current trust file - reset to state at
+ * beginning of program.
+ *
+ * Parameters : None
+ *
+ * Returns : N/A
+ *
+ *********************************************************************/
+void unload_current_trust_file(void)
+{
+ if (current_trustfile)
+ {
+ current_trustfile->unloader = unload_trustfile;
+ current_trustfile = NULL;
+ }
+}
+#endif /* FEATURE_GRACEFUL_TERMINATION */
+
+
/*********************************************************************
*
* Function : load_trustfile
struct block_spec *b, *bl;
struct url_spec **tl;
- char buf[BUFSIZ], *p, *q;
+ char buf[BUFFER_SIZE], *p, *q;
int reject, trusted;
struct file_list *fs;
+ unsigned long linenum = 0;
+ int trusted_referrers = 0;
if (!check_file_changed(current_trustfile, csp->config->trustfile, &fs))
{
tl = csp->config->trust_list;
- while (read_config_line(buf, sizeof(buf), fp, fs) != NULL)
+ while (read_config_line(buf, sizeof(buf), fp, &linenum) != NULL)
{
trusted = 0;
reject = 1;
reject = 0;
p = buf;
q = p+1;
- while ((*p++ = *q++))
+ while ((*p++ = *q++) != '\0')
{
/* nop */
}
*/
if (trusted)
{
- *tl++ = b->url;
+ if(++trusted_referrers < MAX_TRUSTED_REFERRERS)
+ {
+ *tl++ = b->url;
+ }
}
}
+ if(trusted_referrers >= MAX_TRUSTED_REFERRERS)
+ {
+ /*
+ * FIXME: ... after Privoxy 3.0.4 is out.
+ */
+ log_error(LOG_LEVEL_ERROR, "Too many trusted referrers. Current limit is %d, you are using %d.\n"
+ " Additional trusted referrers are treated like ordinary trusted URLs.\n"
+ " (You can increase this limit by changing MAX_TRUSTED_REFERRERS in project.h and recompiling).",
+ MAX_TRUSTED_REFERRERS, trusted_referrers);
+ }
+
*tl = NULL;
fclose(fp);
-#ifndef SPLIT_PROXY_ARGS
- if (!suppress_blocklists)
- {
- fs->proxy_args = strsav(fs->proxy_args, "</pre>");
- }
-#endif /* ndef SPLIT_PROXY_ARGS */
-
/* the old one is now obsolete */
if (current_trustfile)
{
return(-1);
}
-#endif /* def TRUST_FILES */
+#endif /* def FEATURE_TRUST */
-#ifdef PCRS
/*********************************************************************
*
* Function : unload_re_filterfile
*
- * Description : Unload the re_filter list.
+ * Description : Unload the re_filter list by freeing all chained
+ * re_filterfile specs and their data.
*
* Parameters :
* 1 : f = the data structure associated with the filterfile.
*********************************************************************/
static void unload_re_filterfile(void *f)
{
- pcrs_job *joblist;
- struct re_filterfile_spec *b = (struct re_filterfile_spec *)f;
+ struct re_filterfile_spec *a, *b = (struct re_filterfile_spec *)f;
+
+ while (b != NULL)
+ {
+ a = b->next;
- if (b == NULL) return;
+ destroy_list(b->patterns);
+ pcrs_free_joblist(b->joblist);
+ freez(b->name);
+ freez(b->description);
+ freez(b);
- destroy_list(b->patterns);
+ b = a;
+ }
+
+ return;
+}
- joblist = b->joblist;
- while ( NULL != (joblist = pcrs_free_job(joblist)) ) {}
- freez(b);
+#ifdef FEATURE_GRACEFUL_TERMINATION
+/*********************************************************************
+ *
+ * Function : unload_current_re_filterfile
+ *
+ * Description : Unloads current re_filter file - reset to state at
+ * beginning of program.
+ *
+ * Parameters : None
+ *
+ * Returns : N/A
+ *
+ *********************************************************************/
+void unload_current_re_filterfile(void)
+{
+ int i;
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ if (current_re_filterfile[i])
+ {
+ current_re_filterfile[i]->unloader = unload_re_filterfile;
+ current_re_filterfile[i] = NULL;
+ }
+ }
}
+#endif
+
/*********************************************************************
*
* Function : load_re_filterfile
*
- * Description : Load the re_filterfile. Each non-comment, non-empty
- * line is instantly added to the joblist, which is
- * a chained list of pcrs_job structs.
+ * Description : Load the re_filterfile.
+ * Generate a chained list of re_filterfile_spec's from
+ * the "FILTER: " blocks, compiling all their substitutions
+ * into chained lists of pcrs_job structs.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
*
*********************************************************************/
int load_re_filterfile(struct client_state *csp)
+{
+ int i;
+ int result;
+
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ if (csp->config->re_filterfile[i])
+ {
+ result = load_one_re_filterfile(csp, i);
+ if (result)
+ {
+ return result;
+ }
+ }
+ else if (current_re_filterfile[i])
+ {
+ current_re_filterfile[i]->unloader = unload_re_filterfile;
+ current_re_filterfile[i] = NULL;
+ }
+ }
+
+ return 0;
+}
+
+/*********************************************************************
+ *
+ * Function : load_one_re_filterfile
+ *
+ * Description : Load a re_filterfile.
+ * Generate a chained list of re_filterfile_spec's from
+ * the "FILTER: " blocks, compiling all their substitutions
+ * into chained lists of pcrs_job structs.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : 0 => Ok, everything else is an error.
+ *
+ *********************************************************************/
+int load_one_re_filterfile(struct client_state *csp, int fileid)
{
FILE *fp;
- struct re_filterfile_spec *bl;
+ struct re_filterfile_spec *new_bl, *bl = NULL;
struct file_list *fs;
- char buf[BUFSIZ];
+ char buf[BUFFER_SIZE];
int error;
- pcrs_job *dummy;
+ unsigned long linenum = 0;
+ pcrs_job *dummy, *lastjob = NULL;
- if (!check_file_changed(current_re_filterfile, csp->config->re_filterfile, &fs))
+ /*
+ * No need to reload if unchanged
+ */
+ if (!check_file_changed(current_re_filterfile[fileid], csp->config->re_filterfile[fileid], &fs))
{
- /* No need to load */
if (csp)
{
- csp->rlist = current_re_filterfile;
+ csp->rlist[fileid] = current_re_filterfile[fileid];
}
return(0);
}
goto load_re_filterfile_error;
}
- fs->f = bl = (struct re_filterfile_spec *)zalloc(sizeof(*bl));
- if (bl == NULL)
+ /*
+ * Open the file or fail
+ */
+ if ((fp = fopen(csp->config->re_filterfile[fileid], "r")) == NULL)
{
goto load_re_filterfile_error;
}
- /* Open the file or fail */
- if ((fp = fopen(csp->config->re_filterfile, "r")) == NULL)
+ /*
+ * Read line by line
+ */
+ while (read_config_line(buf, sizeof(buf), fp, &linenum) != NULL)
{
- goto load_re_filterfile_error;
- }
+ /*
+ * If this is the head of a new filter block, make it a
+ * re_filterfile spec of its own and chain it to the list:
+ */
+ if (strncmp(buf, "FILTER:", 7) == 0)
+ {
+ new_bl = (struct re_filterfile_spec *)zalloc(sizeof(*bl));
+ if (new_bl == NULL)
+ {
+ goto load_re_filterfile_error;
+ }
- /* Read line by line */
- while (read_config_line(buf, sizeof(buf), fp, fs) != NULL)
- {
- enlist( bl->patterns, buf );
+ new_bl->name = chomp(buf + 7);
+
+ if (NULL != (new_bl->description = strpbrk(new_bl->name, " \t")))
+ {
+ *new_bl->description++ = '\0';
+ new_bl->description = strdup(chomp(new_bl->description));
+ }
+ else
+ {
+ new_bl->description = strdup("No description available for this filter");
+ }
+
+ new_bl->name = strdup(chomp(new_bl->name));
+
+ /*
+ * If this is the first filter block, chain it
+ * to the file_list rather than its (nonexistant)
+ * predecessor
+ */
+ if (fs->f == NULL)
+ {
+ fs->f = new_bl;
+ }
+ else
+ {
+ bl->next = new_bl;
+ }
+ bl = new_bl;
+
+ log_error(LOG_LEVEL_RE_FILTER, "Reading in filter \"%s\" (\"%s\")", bl->name, bl->description);
- /* We have a meaningful line -> make it a job */
- if ((dummy = pcrs_make_job(buf, &error)) == NULL)
- {
- log_error(LOG_LEVEL_RE_FILTER,
- "Adding re_filter job %s failed with error %d.", buf, error);
continue;
}
+
+ /*
+ * Else, save the expression, make it a pcrs_job
+ * and chain it into the current filter's joblist
+ */
+ if (bl != NULL)
+ {
+ enlist(bl->patterns, buf);
+
+ if ((dummy = pcrs_compile_command(buf, &error)) == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Adding re_filter job %s to filter %s failed with error %d.", buf, bl->name, error);
+ continue;
+ }
+ else
+ {
+ if (bl->joblist == NULL)
+ {
+ bl->joblist = dummy;
+ }
+ else
+ {
+ lastjob->next = dummy;
+ }
+ lastjob = dummy;
+ log_error(LOG_LEVEL_RE_FILTER, "Adding re_filter job %s to filter %s succeeded.", buf, bl->name);
+ }
+ }
else
{
- dummy->next = bl->joblist;
- bl->joblist = dummy;
- log_error(LOG_LEVEL_RE_FILTER, "Adding re_filter job %s succeeded.", buf);
+ log_error(LOG_LEVEL_ERROR, "Ignoring job %s outside filter block in %s, line %d", buf, csp->config->re_filterfile, linenum);
}
}
fclose(fp);
-#ifndef SPLIT_PROXY_ARGS
- if (!suppress_blocklists)
- {
- fs->proxy_args = strsav(fs->proxy_args, "</pre>");
- }
-#endif /* ndef SPLIT_PROXY_ARGS */
-
- /* the old one is now obsolete */
- if ( NULL != current_re_filterfile )
+ /*
+ * Schedule the now-obsolete old data for unloading
+ */
+ if ( NULL != current_re_filterfile[fileid] )
{
- current_re_filterfile->unloader = unload_re_filterfile;
+ current_re_filterfile[fileid]->unloader = unload_re_filterfile;
}
+ /*
+ * Chain this file into the global list of loaded files
+ */
fs->next = files->next;
files->next = fs;
- current_re_filterfile = fs;
+ current_re_filterfile[fileid] = fs;
if (csp)
{
- csp->rlist = fs;
+ csp->rlist[fileid] = fs;
}
return( 0 );
load_re_filterfile_error:
- log_error(LOG_LEVEL_FATAL, "can't load re_filterfile '%s': %E",
- csp->config->re_filterfile);
+ log_error(LOG_LEVEL_FATAL, "can't load re_filterfile '%s': %E",
+ csp->config->re_filterfile[fileid]);
return(-1);
}
-#endif /* def PCRS */
/*********************************************************************
* Returns : N/A
*
*********************************************************************/
-void add_loader(int (*loader)(struct client_state *),
+void add_loader(int (*loader)(struct client_state *),
struct configuration_spec * config)
{
int i;