X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=loaders.c;h=92e0353f86c572e9c72d52e92ac3495f887cb1ec;hp=6ca212cc7712941f30052444aeb8c4cbbe6a36d6;hb=97dcdede1c96c857ea36b5f5a9873c302d021a7d;hpb=605576ce35e16c57567f79dd9086bb9ae001753b diff --git a/loaders.c b/loaders.c index 6ca212cc..92e0353f 100644 --- a/loaders.c +++ b/loaders.c @@ -1,4 +1,4 @@ -const char loaders_rcs[] = "$Id: loaders.c,v 1.32 2001/11/07 00:02:13 steudten Exp $"; +const char loaders_rcs[] = "$Id: loaders.c,v 1.103 2016/05/08 10:45:32 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/loaders.c,v $ @@ -8,8 +8,8 @@ const char loaders_rcs[] = "$Id: loaders.c,v 1.32 2001/11/07 00:02:13 steudten E * the list of active loaders, and to automatically * unload files that are no longer in use. * - * Copyright : Written by and Copyright (C) 2001 the SourceForge - * IJBSWA team. http://ijbswa.sourceforge.net + * Copyright : Written by and Copyright (C) 2001-2014 the + * Privoxy team. http://www.privoxy.org/ * * Based on the Internet Junkbuster originally written * by and Copyright (C) 1997 Anonymous Coders and @@ -33,167 +33,8 @@ const char loaders_rcs[] = "$Id: loaders.c,v 1.32 2001/11/07 00:02:13 steudten E * or write to the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Revisions : - * $Log: loaders.c,v $ - * Revision 1.32 2001/11/07 00:02:13 steudten - * Add line number in error output for lineparsing for - * actionsfile and configfile. - * Special handling for CLF added. - * - * Revision 1.31 2001/10/26 17:39:01 oes - * Removed csp->referrer - * Moved ijb_isspace and ijb_tolower to project.h - * - * Revision 1.30 2001/10/25 03:40:48 david__schmidt - * Change in porting tactics: OS/2's EMX porting layer doesn't allow multiple - * threads to call select() simultaneously. So, it's time to do a real, live, - * native OS/2 port. See defines for __EMX__ (the porting layer) vs. __OS2__ - * (native). Both versions will work, but using __OS2__ offers multi-threading. - * - * Revision 1.29 2001/10/23 21:38:53 jongfoster - * Adding error-checking to create_url_spec() - * - * Revision 1.28 2001/10/07 15:40:39 oes - * Replaced 6 boolean members of csp with one bitmap (csp->flags) - * - * Revision 1.27 2001/09/22 16:36:59 jongfoster - * Removing unused parameter fs from read_config_line() - * - * Revision 1.26 2001/09/22 14:05:22 jongfoster - * Bugfix: Multiple escaped "#" characters in a configuration - * file are now permitted. - * Also removing 3 unused headers. - * - * Revision 1.25 2001/09/13 22:44:03 jongfoster - * Adding {} to an if statement - * - * Revision 1.24 2001/07/30 22:08:36 jongfoster - * Tidying up #defines: - * - All feature #defines are now of the form FEATURE_xxx - * - Permanently turned off WIN_GUI_EDIT - * - Permanently turned on WEBDAV and SPLIT_PROXY_ARGS - * - * Revision 1.23 2001/07/20 15:51:54 oes - * Fixed indentation of prepocessor commands - * - * Revision 1.22 2001/07/20 15:16:17 haroon - * - per Guy's suggestion, added a while loop in sweep() to catch not just - * the last inactive CSP but all other consecutive inactive CSPs after that - * as well - * - * Revision 1.21 2001/07/18 17:26:24 oes - * Changed to conform to new pcrs interface - * - * Revision 1.20 2001/07/17 13:07:01 oes - * Fixed segv when last line in config files - * lacked a terminating (\r)\n - * - * Revision 1.19 2001/07/13 14:01:54 oes - * Removed all #ifdef PCRS - * - * Revision 1.18 2001/06/29 21:45:41 oes - * Indentation, CRLF->LF, Tab-> Space - * - * Revision 1.17 2001/06/29 13:31:51 oes - * Various adaptions - * - * Revision 1.16 2001/06/09 10:55:28 jongfoster - * Changing BUFSIZ ==> BUFFER_SIZE - * - * Revision 1.15 2001/06/07 23:14:14 jongfoster - * Removing ACL and forward file loaders - these - * files have been merged into the config file. - * Cosmetic: Moving unloader funcs next to their - * respective loader funcs - * - * Revision 1.14 2001/06/01 03:27:04 oes - * Fixed line continuation problem - * - * Revision 1.13 2001/05/31 21:28:49 jongfoster - * Removed all permissionsfile code - it's now called the actions - * file, and (almost) all the code is in actions.c - * - * Revision 1.12 2001/05/31 17:32:31 oes - * - * - Enhanced domain part globbing with infix and prefix asterisk - * matching and optional unanchored operation - * - * Revision 1.11 2001/05/29 23:25:24 oes - * - * - load_config_line() and load_permissions_file() now use chomp() - * - * Revision 1.10 2001/05/29 09:50:24 jongfoster - * Unified blocklist/imagelist/permissionslist. - * File format is still under discussion, but the internal changes - * are (mostly) done. - * - * Also modified interceptor behaviour: - * - We now intercept all URLs beginning with one of the following - * prefixes (and *only* these prefixes): - * * http://i.j.b/ - * * http://ijbswa.sf.net/config/ - * * http://ijbswa.sourceforge.net/config/ - * - New interceptors "home page" - go to http://i.j.b/ to see it. - * - Internal changes so that intercepted and fast redirect pages - * are not replaced with an image. - * - Interceptors now have the option to send a binary page direct - * to the client. (i.e. ijb-send-banner uses this) - * - Implemented show-url-info interceptor. (Which is why I needed - * the above interceptors changes - a typical URL is - * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif". - * The previous mechanism would not have intercepted that, and - * if it had been intercepted then it then it would have replaced - * it with an image.) - * - * Revision 1.9 2001/05/26 17:12:07 jongfoster - * Fatal errors loading configuration files now give better error messages. - * - * Revision 1.8 2001/05/26 00:55:20 jongfoster - * Removing duplicated code. load_forwardfile() now uses create_url_spec() - * - * Revision 1.7 2001/05/26 00:28:36 jongfoster - * Automatic reloading of config file. - * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32). - * Most of the global variables have been moved to a new - * struct configuration_spec, accessed through csp->config->globalname - * Most of the globals remaining are used by the Win32 GUI. - * - * Revision 1.6 2001/05/23 12:27:33 oes - * - * Fixed ugly indentation of my last changes - * - * Revision 1.5 2001/05/23 10:39:05 oes - * - Added support for escaping the comment character - * in config files by a backslash - * - Added support for line continuation in config - * files - * - Fixed a buffer overflow bug with long config lines - * - * Revision 1.4 2001/05/22 18:56:28 oes - * CRLF -> LF - * - * Revision 1.3 2001/05/20 01:21:20 jongfoster - * Version 2.9.4 checkin. - * - Merged popupfile and cookiefile, and added control over PCRS - * filtering, in new "permissionsfile". - * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration - * file error you now get a message box (in the Win32 GUI) rather - * than the program exiting with no explanation. - * - Made killpopup use the PCRS MIME-type checking and HTTP-header - * skipping. - * - Removed tabs from "config" - * - Moved duplicated url parsing code in "loaders.c" to a new funcition. - * - Bumped up version number. - * - * Revision 1.2 2001/05/17 23:01:01 oes - * - Cleaned CRLF's from the sources and related files - * - * Revision 1.1.1.1 2001/05/15 13:58:59 oes - * Initial import of version 2.9.3 source tree - * - * *********************************************************************/ - + #include "config.h" @@ -219,6 +60,8 @@ const char loaders_rcs[] = "$Id: loaders.c,v 1.32 2001/11/07 00:02:13 steudten E #include "miscutil.h" #include "errlog.h" #include "actions.h" +#include "urlmatch.h" +#include "encode.h" const char loaders_h_rcs[] = LOADERS_H_VERSION; @@ -231,8 +74,12 @@ const char loaders_h_rcs[] = LOADERS_H_VERSION; static struct file_list *current_trustfile = NULL; #endif /* def FEATURE_TRUST */ -static struct file_list *current_re_filterfile = NULL; +static int load_one_re_filterfile(struct client_state *csp, int fileid); +static struct file_list *current_re_filterfile[MAX_AF_FILES] = { + NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL +}; /********************************************************************* @@ -256,273 +103,142 @@ static struct file_list *current_re_filterfile = NULL; * * Parameters : None * - * Returns : N/A + * Returns : The number of threads that are still active. * *********************************************************************/ -void sweep(void) +unsigned int sweep(void) { struct file_list *fl, *nfl; - struct client_state *csp, *ncsp; + struct client_state *csp; + struct client_states *last_active, *client_list; + int i; + unsigned int active_threads = 0; /* clear all of the file's active flags */ - for ( fl = files->next; NULL != fl; fl = fl->next ) + for (fl = files->next; NULL != fl; fl = fl->next) { fl->active = 0; } - for (csp = clients; csp && (ncsp = csp->next) ; csp = csp->next) + last_active = clients; + client_list = clients->next; + + while (NULL != client_list) { - if (ncsp->flags & CSP_FLAG_ACTIVE) + csp = &client_list->csp; + if (csp->flags & CSP_FLAG_ACTIVE) { - /* mark this client's files as active */ + /* Mark this client's files as active */ /* * Always have a configuration file. * (Also note the slightly non-standard extra * indirection here.) */ - ncsp->config->config_file_list->active = 1; + csp->config->config_file_list->active = 1; - if (ncsp->actions_list) /* actions files */ + /* + * Actions files + */ + for (i = 0; i < MAX_AF_FILES; i++) { - ncsp->actions_list->active = 1; + if (csp->actions_list[i]) + { + csp->actions_list[i]->active = 1; + } } - if (ncsp->rlist) /* pcrsjob files */ + /* + * Filter files + */ + for (i = 0; i < MAX_AF_FILES; i++) { - ncsp->rlist->active = 1; + if (csp->rlist[i]) + { + csp->rlist[i]->active = 1; + } } + /* + * Trust file + */ #ifdef FEATURE_TRUST - if (ncsp->tlist) /* trust files */ + if (csp->tlist) { - ncsp->tlist->active = 1; + csp->tlist->active = 1; } #endif /* def FEATURE_TRUST */ + active_threads++; + + last_active = client_list; + client_list = client_list->next; } else /* - * this client is not active, release its resources - * and the ones of all inactive clients that might - * follow it + * This client is not active. Free its resources. */ { - while (!(ncsp->flags & CSP_FLAG_ACTIVE)) - { - csp->next = ncsp->next; + last_active->next = client_list->next; - freez(ncsp->ip_addr_str); - freez(ncsp->my_ip_addr_str); - freez(ncsp->my_hostname); - freez(ncsp->x_forwarded); - freez(ncsp->iob->buf); + freez(csp->ip_addr_str); +#ifdef FEATURE_CLIENT_TAGS + freez(csp->client_address); +#endif + freez(csp->client_iob->buf); + freez(csp->iob->buf); + freez(csp->error_message); - free_http_request(ncsp->http); + if (csp->action->flags & ACTION_FORWARD_OVERRIDE && + NULL != csp->fwd) + { + unload_forward_spec(csp->fwd); + } + free_http_request(csp->http); - destroy_list(ncsp->headers); - destroy_list(ncsp->cookie_list); + destroy_list(csp->headers); + destroy_list(csp->tags); - free_current_action(ncsp->action); + free_current_action(csp->action); #ifdef FEATURE_STATISTICS - urls_read++; - if (ncsp->flags & CSP_FLAG_REJECTED) - { - urls_rejected++; - } -#endif /* def FEATURE_STATISTICS */ - - freez(ncsp); - - /* are there any more in sequence after it? */ - if( !(ncsp = csp->next) ) - break; + urls_read++; + if (csp->flags & CSP_FLAG_REJECTED) + { + urls_rejected++; } - } - } - - for (fl = files; fl && (nfl = fl->next) ; fl = fl->next) - { - if ( ( 0 == nfl->active ) && ( NULL != nfl->unloader ) ) - { - fl->next = nfl->next; - - (nfl->unloader)(nfl->f); +#endif /* def FEATURE_STATISTICS */ - freez(nfl->filename); + freez(client_list); - freez(nfl); + client_list = last_active->next; } } -} - - -/********************************************************************* - * - * Function : create_url_spec - * - * Description : Creates a "url_spec" structure from a string. - * When finished, free with unload_url(). - * - * Parameters : - * 1 : url = Target url_spec to be filled in. Must be - * zeroed out before the call (e.g. using zalloc). - * 2 : buf = Source pattern, null terminated. NOTE: The - * contents of this buffer are destroyed by this - * function. If this function succeeds, the - * buffer is copied to url->spec. If this - * function fails, the contents of the buffer - * are lost forever. - * - * Returns : JB_ERR_OK - Success - * JB_ERR_MEMORY - Out of memory - * JB_ERR_PARSE - Cannot parse regex (Detailed message - * written to system log) - * - *********************************************************************/ -jb_err create_url_spec(struct url_spec * url, char * buf) -{ - char *p; - struct url_spec tmp_url[1]; - - assert(url); - assert(buf); + nfl = files; + fl = files->next; - /* save a copy of the orignal specification */ - if ((url->spec = strdup(buf)) == NULL) + while (fl != NULL) { - return JB_ERR_MEMORY; - } - - if ((p = strchr(buf, '/'))) - { - if (NULL == (url->path = strdup(p))) + if ((0 == fl->active) && (NULL != fl->unloader)) { - freez(url->spec); - return JB_ERR_MEMORY; - } - url->pathlen = strlen(url->path); - *p = '\0'; - } - else - { - url->path = NULL; - url->pathlen = 0; - } -#ifdef REGEX - if (url->path) - { - int errcode; - char rebuf[BUFFER_SIZE]; + nfl->next = fl->next; - if (NULL == (url->preg = zalloc(sizeof(*url->preg)))) - { - freez(url->spec); - freez(url->path); - return JB_ERR_MEMORY; - } + (fl->unloader)(fl->f); - sprintf(rebuf, "^(%s)", url->path); + freez(fl->filename); + freez(fl); - errcode = regcomp(url->preg, rebuf, - (REG_EXTENDED|REG_NOSUB|REG_ICASE)); - if (errcode) + fl = nfl->next; + } + else { - size_t errlen = regerror(errcode, - url->preg, rebuf, sizeof(rebuf)); - - if (errlen > (sizeof(rebuf) - (size_t)1)) - { - errlen = sizeof(rebuf) - (size_t)1; - } - rebuf[errlen] = '\0'; - - log_error(LOG_LEVEL_ERROR, "error compiling %s: %s", - url->spec, rebuf); - - freez(url->spec); - freez(url->path); - freez(url->preg); - - return JB_ERR_PARSE; + nfl = fl; + fl = fl->next; } } -#endif - if ((p = strchr(buf, ':')) == NULL) - { - url->port = 0; - } - else - { - *p++ = '\0'; - url->port = atoi(p); - } - - if ((url->domain = strdup(buf)) == NULL) - { - freez(url->spec); - freez(url->path); -#ifdef REGEX - freez(url->preg); -#endif /* def REGEX */ - return JB_ERR_MEMORY; - } - /* split domain into components */ - - *tmp_url = dsplit(url->domain); - if (tmp_url->dbuf == NULL) - { - freez(url->spec); - freez(url->path); - freez(url->domain); -#ifdef REGEX - freez(url->preg); -#endif /* def REGEX */ - return JB_ERR_MEMORY; - } - - url->dbuf = tmp_url->dbuf; - url->dcnt = tmp_url->dcnt; - url->dvec = tmp_url->dvec; - url->unanchored = tmp_url->unanchored; - - return JB_ERR_OK; - -} - - -/********************************************************************* - * - * Function : free_url - * - * Description : Called from the "unloaders". Freez the url - * structure elements. - * - * Parameters : - * 1 : url = pointer to a url_spec structure. - * - * Returns : N/A - * - *********************************************************************/ -void free_url(struct url_spec *url) -{ - if (url == NULL) return; - - freez(url->spec); - freez(url->domain); - freez(url->dbuf); - freez(url->dvec); - freez(url->path); -#ifdef REGEX - if (url->preg) - { - regfree(url->preg); - freez(url->preg); - } -#endif + return active_threads; } @@ -575,126 +291,447 @@ int check_file_changed(const struct file_list * current, return 0; } - fs = (struct file_list *)zalloc(sizeof(struct file_list)); + fs = zalloc_or_die(sizeof(struct file_list)); + fs->filename = strdup_or_die(filename); + fs->lastmodified = statbuf->st_mtime; - if (fs == NULL) + if (fs->filename == NULL) { /* Out of memory error */ + freez (fs); return 1; } + *newfl = fs; + return 1; +} - fs->filename = strdup(filename); - fs->lastmodified = statbuf->st_mtime; - if (fs->filename == NULL) +/********************************************************************* + * + * Function : simple_read_line + * + * Description : Read a single line from a file and return it. + * This is basically a version of fgets() that malloc()s + * it's own line buffer. Note that the buffer will + * always be a multiple of BUFFER_SIZE bytes long. + * Therefore if you are going to keep the string for + * an extended period of time, you should probably + * strdup() it and free() the original, to save memory. + * + * + * Parameters : + * 1 : dest = destination for newly malloc'd pointer to + * line data. Will be set to NULL on error. + * 2 : fp = File to read from + * 3 : newline = Standard for newlines in the file. + * Will be unchanged if it's value on input is not + * NEWLINE_UNKNOWN. + * On output, may be changed from NEWLINE_UNKNOWN to + * actual convention in file. + * + * Returns : JB_ERR_OK on success + * JB_ERR_MEMORY on out-of-memory + * JB_ERR_FILE on EOF. + * + *********************************************************************/ +jb_err simple_read_line(FILE *fp, char **dest, int *newline) +{ + size_t len = 0; + size_t buflen = BUFFER_SIZE; + char * buf; + char * p; + int ch; + int realnewline = NEWLINE_UNKNOWN; + + if (NULL == (buf = malloc(buflen))) { - /* Out of memory error */ - freez (fs); - return 1; + return JB_ERR_MEMORY; } + p = buf; - *newfl = fs; - return 1; +/* + * Character codes. If you have a weird compiler and the following are + * incorrect, you also need to fix NEWLINE() in loaders.h + */ +#define CHAR_CR '\r' /* ASCII 13 */ +#define CHAR_LF '\n' /* ASCII 10 */ + for (;;) + { + ch = getc(fp); + if (ch == EOF) + { + if (len > 0) + { + *p = '\0'; + *dest = buf; + return JB_ERR_OK; + } + else + { + free(buf); + *dest = NULL; + return JB_ERR_FILE; + } + } + else if (ch == CHAR_CR) + { + ch = getc(fp); + if (ch == CHAR_LF) + { + if (*newline == NEWLINE_UNKNOWN) + { + *newline = NEWLINE_DOS; + } + } + else + { + if (ch != EOF) + { + ungetc(ch, fp); + } + if (*newline == NEWLINE_UNKNOWN) + { + *newline = NEWLINE_MAC; + } + } + *p = '\0'; + *dest = buf; + if (*newline == NEWLINE_UNKNOWN) + { + *newline = realnewline; + } + return JB_ERR_OK; + } + else if (ch == CHAR_LF) + { + *p = '\0'; + *dest = buf; + if (*newline == NEWLINE_UNKNOWN) + { + *newline = NEWLINE_UNIX; + } + return JB_ERR_OK; + } + else if (ch == 0) + { + *p = '\0'; + *dest = buf; + return JB_ERR_OK; + } + + *p++ = (char)ch; + + if (++len >= buflen) + { + buflen += BUFFER_SIZE; + if (NULL == (p = realloc(buf, buflen))) + { + free(buf); + return JB_ERR_MEMORY; + } + buf = p; + p = buf + len; + } + } } /********************************************************************* * - * Function : read_config_line + * Function : edit_read_line * * Description : Read a single non-empty line from a file and return * it. Trims comments, leading and trailing whitespace * and respects escaping of newline and comment char. + * Provides the line in 2 alternative forms: raw and + * preprocessed. + * - raw is the raw data read from the file. If the + * line is not modified, then this should be written + * to the new file. + * - prefix is any comments and blank lines that were + * read from the file. If the line is modified, then + * this should be written out to the file followed + * by the modified data. (If this string is non-empty + * then it will have a newline at the end). + * - data is the actual data that will be parsed + * further by appropriate routines. + * On EOF, the 3 strings will all be set to NULL and + * 0 will be returned. * * Parameters : - * 1 : buf = Buffer to use. - * 2 : buflen = Size of buffer in bytes. - * 3 : fp = File to read from - * 4 : linenum = linenumber in file - * - * Returns : NULL on EOF or error - * Otherwise, returns buf. + * 1 : fp = File to read from + * 2 : raw_out = destination for newly malloc'd pointer to + * raw line data. May be NULL if you don't want it. + * 3 : prefix_out = destination for newly malloc'd pointer to + * comments. May be NULL if you don't want it. + * 4 : data_out = destination for newly malloc'd pointer to + * line data with comments and leading/trailing spaces + * removed, and line continuation performed. May be + * NULL if you don't want it. + * 5 : newline = Standard for newlines in the file. + * On input, set to value to use or NEWLINE_UNKNOWN. + * On output, may be changed from NEWLINE_UNKNOWN to + * actual convention in file. May be NULL if you + * don't want it. + * 6 : line_number = Line number in file. In "lines" as + * reported by a text editor, not lines containing data. + * + * Returns : JB_ERR_OK on success + * JB_ERR_MEMORY on out-of-memory + * JB_ERR_FILE on EOF. * *********************************************************************/ -char *read_config_line(char *buf, int buflen, FILE *fp, unsigned long *linenum) +jb_err edit_read_line(FILE *fp, + char **raw_out, + char **prefix_out, + char **data_out, + int *newline, + unsigned long *line_number) { - char *p; - char *src; - char *dest; - char linebuf[BUFFER_SIZE]; - int contflag = 0; + char *p; /* Temporary pointer */ + char *linebuf; /* Line read from file */ + char *linestart; /* Start of linebuf, usually first non-whitespace char */ + int contflag = 0; /* Nonzero for line continuation - i.e. line ends '\' */ + int is_empty = 1; /* Flag if not got any data yet */ + char *raw = NULL; /* String to be stored in raw_out */ + char *prefix = NULL; /* String to be stored in prefix_out */ + char *data = NULL; /* String to be stored in data_out */ + int scrapnewline; /* Used for (*newline) if newline==NULL */ + jb_err rval = JB_ERR_OK; + + assert(fp); + assert(raw_out || data_out); + assert(newline == NULL + || *newline == NEWLINE_UNKNOWN + || *newline == NEWLINE_UNIX + || *newline == NEWLINE_DOS + || *newline == NEWLINE_MAC); + + if (newline == NULL) + { + scrapnewline = NEWLINE_UNKNOWN; + newline = &scrapnewline; + } - *buf = '\0'; + /* Set output parameters to NULL */ + if (raw_out) + { + *raw_out = NULL; + } + if (prefix_out) + { + *prefix_out = NULL; + } + if (data_out) + { + *data_out = NULL; + } + + /* Set string variables to new, empty strings. */ - while (fgets(linebuf, sizeof(linebuf), fp)) + if (raw_out) + { + raw = strdup_or_die(""); + } + if (prefix_out) { - (*linenum)++; - /* Trim off newline */ - if ((p = strpbrk(linebuf, "\r\n")) != NULL) + prefix = strdup_or_die(""); + } + if (data_out) + { + data = strdup_or_die(""); + } + + /* Main loop. Loop while we need more data & it's not EOF. */ + + while ((contflag || is_empty) + && (JB_ERR_OK == (rval = simple_read_line(fp, &linebuf, newline)))) + { + if (line_number) { - *p = '\0'; + (*line_number)++; } - else + if (raw) { - p = linebuf + strlen(linebuf); + string_append(&raw,linebuf); + if (string_append(&raw,NEWLINE(*newline))) + { + freez(prefix); + freez(data); + free(linebuf); + return JB_ERR_MEMORY; + } } /* Line continuation? Trim escape and set flag. */ - if ((p != linebuf) && (*--p == '\\')) + p = linebuf + strlen(linebuf) - 1; + contflag = ((*linebuf != '\0') && (*p == '\\')); + if (contflag) { - contflag = 1; *p = '\0'; } - /* If there's a comment char.. */ - p = linebuf; + /* Trim leading spaces if we're at the start of the line */ + linestart = linebuf; + assert(NULL != data); + if (*data == '\0') + { + /* Trim leading spaces */ + while (*linestart && isspace((int)(unsigned char)*linestart)) + { + linestart++; + } + } + + /* Handle comment characters. */ + p = linestart; while ((p = strchr(p, '#')) != NULL) { - /* ..and it's escaped, left-shift the line over the escape. */ + /* Found a comment char.. */ if ((p != linebuf) && (*(p-1) == '\\')) { - src = p; - dest = p - 1; - while ((*dest++ = *src++) != '\0') + /* ..and it's escaped, left-shift the line over the escape. */ + char *q = p - 1; + while ((*q = *(q + 1)) != '\0') { - /* nop */ + q++; } /* Now scan from just after the "#". */ } - /* Else, chop off the rest of the line */ else { + /* Real comment. Save it... */ + if (p == linestart) + { + /* Special case: Line only contains a comment, so all the + * previous whitespace is considered part of the comment. + * Undo the whitespace skipping, if any. + */ + linestart = linebuf; + p = linestart; + } + if (prefix) + { + string_append(&prefix,p); + if (string_append(&prefix, NEWLINE(*newline))) + { + freez(raw); + freez(data); + free(linebuf); + return JB_ERR_MEMORY; + } + } + + /* ... and chop off the rest of the line */ *p = '\0'; } - } + } /* END while (there's a # character) */ /* Write to the buffer */ - if (*linebuf) + if (*linestart) { - strncat(buf, linebuf, buflen - strlen(buf)); + is_empty = 0; + if (string_append(&data, linestart)) + { + freez(raw); + freez(prefix); + free(linebuf); + return JB_ERR_MEMORY; + } } - /* Continue? */ - if (contflag) - { - contflag = 0; - continue; - } + free(linebuf); + } /* END while(we need more data) */ + + /* Handle simple_read_line() errors - ignore EOF */ + if ((rval != JB_ERR_OK) && (rval != JB_ERR_FILE)) + { + freez(raw); + freez(prefix); + freez(data); + return rval; + } + + if (raw ? (*raw == '\0') : is_empty) + { + /* EOF and no data there. (Definition of "data" depends on whether + * the caller cares about "raw" or just "data"). + */ + + freez(raw); + freez(prefix); + freez(data); + + return JB_ERR_FILE; + } + else + { + /* Got at least some data */ - /* Remove leading and trailing whitespace */ - chomp(buf); + /* Remove trailing whitespace */ + chomp(data); - if (*buf) + if (raw_out) + { + *raw_out = raw; + } + else + { + freez(raw); + } + if (prefix_out) + { + *prefix_out = prefix; + } + else + { + freez(prefix); + } + if (data_out) + { + *data_out = data; + } + else { - return buf; + freez(data); } + return JB_ERR_OK; } +} - /* EOF */ - return NULL; +/********************************************************************* + * + * Function : read_config_line + * + * Description : Read a single non-empty line from a file and return + * it. Trims comments, leading and trailing whitespace + * and respects escaping of newline and comment char. + * + * Parameters : + * 1 : fp = File to read from + * 2 : linenum = linenumber in file + * 3 : buf = Pointer to a pointer to set to the data buffer. + * + * Returns : NULL on EOF or error + * Otherwise, returns buf. + * + *********************************************************************/ +char *read_config_line(FILE *fp, unsigned long *linenum, char **buf) +{ + jb_err err; + err = edit_read_line(fp, NULL, NULL, buf, NULL, linenum); + if (err) + { + if (err == JB_ERR_MEMORY) + { + log_error(LOG_LEVEL_FATAL, "Out of memory loading a config file"); + } + *buf = NULL; + } + return *buf; } @@ -713,18 +750,46 @@ char *read_config_line(char *buf, int buflen, FILE *fp, unsigned long *linenum) *********************************************************************/ static void unload_trustfile(void *f) { - struct block_spec *b = (struct block_spec *)f; - if (b == NULL) return; + struct block_spec *cur = (struct block_spec *)f; + struct block_spec *next; - unload_trustfile(b->next); /* Stack is cheap, isn't it? */ + while (cur != NULL) + { + next = cur->next; - free_url(b->url); + free_pattern_spec(cur->url); + free(cur); - freez(b); + cur = next; + } } +#ifdef FEATURE_GRACEFUL_TERMINATION +/********************************************************************* + * + * Function : unload_current_trust_file + * + * Description : Unloads current trust file - reset to state at + * beginning of program. + * + * Parameters : None + * + * Returns : N/A + * + *********************************************************************/ +void unload_current_trust_file(void) +{ + if (current_trustfile) + { + current_trustfile->unloader = unload_trustfile; + current_trustfile = NULL; + } +} +#endif /* FEATURE_GRACEFUL_TERMINATION */ + + /********************************************************************* * * Function : load_trustfile @@ -742,20 +807,18 @@ int load_trustfile(struct client_state *csp) FILE *fp; struct block_spec *b, *bl; - struct url_spec **tl; + struct pattern_spec **tl; - char buf[BUFFER_SIZE], *p, *q; + char *buf = NULL; int reject, trusted; struct file_list *fs; unsigned long linenum = 0; + int trusted_referrers = 0; if (!check_file_changed(current_trustfile, csp->config->trustfile, &fs)) { /* No need to load */ - if (csp) - { - csp->tlist = current_trustfile; - } + csp->tlist = current_trustfile; return(0); } if (!fs) @@ -763,20 +826,17 @@ int load_trustfile(struct client_state *csp) goto load_trustfile_error; } - fs->f = bl = (struct block_spec *)zalloc(sizeof(*bl)); - if (bl == NULL) - { - goto load_trustfile_error; - } + fs->f = bl = zalloc_or_die(sizeof(*bl)); if ((fp = fopen(csp->config->trustfile, "r")) == NULL) { goto load_trustfile_error; } + log_error(LOG_LEVEL_INFO, "Loading trust file: %s", csp->config->trustfile); tl = csp->config->trust_list; - while (read_config_line(buf, sizeof(buf), fp, &linenum) != NULL) + while (read_config_line(fp, &linenum, &buf) != NULL) { trusted = 0; reject = 1; @@ -789,10 +849,13 @@ int load_trustfile(struct client_state *csp) if (*buf == '~') { + char *p; + char *q; + reject = 0; p = buf; q = p+1; - while ((*p++ = *q++)) + while ((*p++ = *q++) != '\0') { /* nop */ } @@ -801,15 +864,12 @@ int load_trustfile(struct client_state *csp) /* skip blank lines */ if (*buf == '\0') { + freez(buf); continue; } /* allocate a new node */ - if ((b = zalloc(sizeof(*b))) == NULL) - { - fclose(fp); - goto load_trustfile_error; - } + b = zalloc_or_die(sizeof(*b)); /* add it to the list */ b->next = bl->next; @@ -818,7 +878,7 @@ int load_trustfile(struct client_state *csp) b->reject = reject; /* Save the URL pattern */ - if (create_url_spec(b->url, buf)) + if (create_pattern_spec(b->url, buf)) { fclose(fp); goto load_trustfile_error; @@ -829,8 +889,23 @@ int load_trustfile(struct client_state *csp) */ if (trusted) { - *tl++ = b->url; + if (++trusted_referrers < MAX_TRUSTED_REFERRERS) + { + *tl++ = b->url; + } } + freez(buf); + } + + if (trusted_referrers >= MAX_TRUSTED_REFERRERS) + { + /* + * FIXME: ... after Privoxy 3.0.4 is out. + */ + log_error(LOG_LEVEL_ERROR, "Too many trusted referrers. Current limit is %d, you are using %d.\n" + " Additional trusted referrers are treated like ordinary trusted URLs.\n" + " (You can increase this limit by changing MAX_TRUSTED_REFERRERS in project.h and recompiling).", + MAX_TRUSTED_REFERRERS, trusted_referrers); } *tl = NULL; @@ -846,17 +921,14 @@ int load_trustfile(struct client_state *csp) fs->next = files->next; files->next = fs; current_trustfile = fs; - - if (csp) - { - csp->tlist = fs; - } + csp->tlist = fs; return(0); load_trustfile_error: log_error(LOG_LEVEL_FATAL, "can't load trustfile '%s': %E", - csp->config->trustfile); + csp->config->trustfile); + freez(buf); return(-1); } @@ -867,7 +939,8 @@ load_trustfile_error: * * Function : unload_re_filterfile * - * Description : Unload the re_filter list. + * Description : Unload the re_filter list by freeing all chained + * re_filterfile specs and their data. * * Parameters : * 1 : f = the data structure associated with the filterfile. @@ -877,27 +950,127 @@ load_trustfile_error: *********************************************************************/ static void unload_re_filterfile(void *f) { - struct re_filterfile_spec *b = (struct re_filterfile_spec *)f; + struct re_filterfile_spec *a, *b = (struct re_filterfile_spec *)f; - if (b == NULL) + while (b != NULL) { - return; + a = b->next; + + destroy_list(b->patterns); + pcrs_free_joblist(b->joblist); + freez(b->name); + freez(b->description); + freez(b); + + b = a; } - destroy_list(b->patterns); - pcrs_free_joblist(b->joblist); - freez(b); + return; +} + +/********************************************************************* + * + * Function : unload_forward_spec + * + * Description : Unload the forward spec settings by freeing all + * memory referenced by members and the memory for + * the spec itself. + * + * Parameters : + * 1 : fwd = the forward spec. + * + * Returns : N/A + * + *********************************************************************/ +void unload_forward_spec(struct forward_spec *fwd) +{ + free_pattern_spec(fwd->url); + freez(fwd->gateway_host); + freez(fwd->forward_host); + free(fwd); return; } + +#ifdef FEATURE_GRACEFUL_TERMINATION +/********************************************************************* + * + * Function : unload_current_re_filterfile + * + * Description : Unloads current re_filter file - reset to state at + * beginning of program. + * + * Parameters : None + * + * Returns : N/A + * + *********************************************************************/ +void unload_current_re_filterfile(void) +{ + int i; + + for (i = 0; i < MAX_AF_FILES; i++) + { + if (current_re_filterfile[i]) + { + current_re_filterfile[i]->unloader = unload_re_filterfile; + current_re_filterfile[i] = NULL; + } + } +} +#endif + + +/********************************************************************* + * + * Function : load_re_filterfiles + * + * Description : Loads all the filterfiles. + * Generate a chained list of re_filterfile_spec's from + * the "FILTER: " blocks, compiling all their substitutions + * into chained lists of pcrs_job structs. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : 0 => Ok, everything else is an error. + * + *********************************************************************/ +int load_re_filterfiles(struct client_state *csp) +{ + int i; + int result; + + for (i = 0; i < MAX_AF_FILES; i++) + { + if (csp->config->re_filterfile[i]) + { + result = load_one_re_filterfile(csp, i); + if (result) + { + return result; + } + } + else if (current_re_filterfile[i]) + { + current_re_filterfile[i]->unloader = unload_re_filterfile; + current_re_filterfile[i] = NULL; + } + } + + return 0; +} + + /********************************************************************* * - * Function : load_re_filterfile + * Function : load_one_re_filterfile * - * Description : Load the re_filterfile. Each non-comment, non-empty - * line is instantly added to the joblist, which is - * a chained list of pcrs_job structs. + * Description : Load a re_filterfile. + * Generate a chained list of re_filterfile_spec's from + * the "FILTER: " blocks, compiling all their substitutions + * into chained lists of pcrs_job structs. * * Parameters : * 1 : csp = Current client state (buffers, headers, etc...) @@ -905,25 +1078,24 @@ static void unload_re_filterfile(void *f) * Returns : 0 => Ok, everything else is an error. * *********************************************************************/ -int load_re_filterfile(struct client_state *csp) +int load_one_re_filterfile(struct client_state *csp, int fileid) { FILE *fp; - struct re_filterfile_spec *bl; + struct re_filterfile_spec *new_bl, *bl = NULL; struct file_list *fs; - char buf[BUFFER_SIZE]; + char *buf = NULL; int error; unsigned long linenum = 0; - pcrs_job *dummy; + pcrs_job *dummy, *lastjob = NULL; - if (!check_file_changed(current_re_filterfile, csp->config->re_filterfile, &fs)) + /* + * No need to reload if unchanged + */ + if (!check_file_changed(current_re_filterfile[fileid], csp->config->re_filterfile[fileid], &fs)) { - /* No need to load */ - if (csp) - { - csp->rlist = current_re_filterfile; - } + csp->rlist[fileid] = current_re_filterfile[fileid]; return(0); } if (!fs) @@ -931,60 +1103,234 @@ int load_re_filterfile(struct client_state *csp) goto load_re_filterfile_error; } - fs->f = bl = (struct re_filterfile_spec *)zalloc(sizeof(*bl)); - if (bl == NULL) + /* + * Open the file or fail + */ + if ((fp = fopen(csp->config->re_filterfile[fileid], "r")) == NULL) { goto load_re_filterfile_error; } - /* Open the file or fail */ - if ((fp = fopen(csp->config->re_filterfile, "r")) == NULL) - { - goto load_re_filterfile_error; - } + log_error(LOG_LEVEL_INFO, "Loading filter file: %s", csp->config->re_filterfile[fileid]); - /* Read line by line */ - while (read_config_line(buf, sizeof(buf), fp, &linenum) != NULL) + /* + * Read line by line + */ + while (read_config_line(fp, &linenum, &buf) != NULL) { - enlist( bl->patterns, buf ); + enum filter_type new_filter = FT_INVALID_FILTER; + + if (strncmp(buf, "FILTER:", 7) == 0) + { + new_filter = FT_CONTENT_FILTER; + } + else if (strncmp(buf, "SERVER-HEADER-FILTER:", 21) == 0) + { + new_filter = FT_SERVER_HEADER_FILTER; + } + else if (strncmp(buf, "CLIENT-HEADER-FILTER:", 21) == 0) + { + new_filter = FT_CLIENT_HEADER_FILTER; + } + else if (strncmp(buf, "CLIENT-HEADER-TAGGER:", 21) == 0) + { + new_filter = FT_CLIENT_HEADER_TAGGER; + } + else if (strncmp(buf, "SERVER-HEADER-TAGGER:", 21) == 0) + { + new_filter = FT_SERVER_HEADER_TAGGER; + } +#ifdef FEATURE_EXTERNAL_FILTERS + else if (strncmp(buf, "EXTERNAL-FILTER:", 16) == 0) + { + new_filter = FT_EXTERNAL_CONTENT_FILTER; + } +#endif + + /* + * If this is the head of a new filter block, make it a + * re_filterfile spec of its own and chain it to the list: + */ + if (new_filter != FT_INVALID_FILTER) + { + new_bl = zalloc_or_die(sizeof(*bl)); + if (new_filter == FT_CONTENT_FILTER) + { + new_bl->name = chomp(buf + 7); + } +#ifdef FEATURE_EXTERNAL_FILTERS + else if (new_filter == FT_EXTERNAL_CONTENT_FILTER) + { + new_bl->name = chomp(buf + 16); + } +#endif + else + { + new_bl->name = chomp(buf + 21); + } + new_bl->type = new_filter; + + /* + * If a filter description is available, + * encode it to HTML and save it. + */ + if (NULL != (new_bl->description = strpbrk(new_bl->name, " \t"))) + { + *new_bl->description++ = '\0'; + new_bl->description = html_encode(chomp(new_bl->description)); + if (NULL == new_bl->description) + { + new_bl->description = strdup_or_die("Out of memory while " + "encoding filter description to HTML"); + } + } + else + { + new_bl->description = strdup_or_die("No description available"); + } + + new_bl->name = strdup_or_die(chomp(new_bl->name)); + + /* + * If this is the first filter block, chain it + * to the file_list rather than its (nonexistant) + * predecessor + */ + if (fs->f == NULL) + { + fs->f = new_bl; + } + else + { + assert(NULL != bl); + bl->next = new_bl; + } + bl = new_bl; + + log_error(LOG_LEVEL_RE_FILTER, "Reading in filter \"%s\" (\"%s\")", bl->name, bl->description); - /* We have a meaningful line -> make it a job */ - if ((dummy = pcrs_compile_command(buf, &error)) == NULL) + freez(buf); + continue; + } + +#ifdef FEATURE_EXTERNAL_FILTERS + if ((bl != NULL) && (bl->type == FT_EXTERNAL_CONTENT_FILTER)) { - log_error(LOG_LEVEL_RE_FILTER, - "Adding re_filter job %s failed with error %d.", buf, error); + /* Save the code as "pattern", but do not compile anything. */ + if (bl->patterns->first != NULL) + { + log_error(LOG_LEVEL_FATAL, "External filter '%s' contains several jobss. " + "Did you forget to escape a line break?", + bl->name); + } + error = enlist(bl->patterns, buf); + if (JB_ERR_MEMORY == error) + { + log_error(LOG_LEVEL_FATAL, + "Out of memory while enlisting external filter code \'%s\' for filter %s.", + buf, bl->name); + } + freez(buf); continue; } +#endif + if (bl != NULL) + { + /* + * Save the expression, make it a pcrs_job + * and chain it into the current filter's joblist + */ + error = enlist(bl->patterns, buf); + if (JB_ERR_MEMORY == error) + { + log_error(LOG_LEVEL_FATAL, + "Out of memory while enlisting re_filter job \'%s\' for filter %s.", buf, bl->name); + } + assert(JB_ERR_OK == error); + + if (pcrs_job_is_dynamic(buf)) + { + /* + * Dynamic pattern that might contain variables + * and has to be recompiled for every request + */ + if (bl->joblist != NULL) + { + pcrs_free_joblist(bl->joblist); + bl->joblist = NULL; + } + bl->dynamic = 1; + log_error(LOG_LEVEL_RE_FILTER, + "Adding dynamic re_filter job \'%s\' to filter %s succeeded.", buf, bl->name); + freez(buf); + continue; + } + else if (bl->dynamic) + { + /* + * A previous job was dynamic and as we + * recompile the whole filter anyway, it + * makes no sense to compile this job now. + */ + log_error(LOG_LEVEL_RE_FILTER, + "Adding static re_filter job \'%s\' to dynamic filter %s succeeded.", buf, bl->name); + freez(buf); + continue; + } + + if ((dummy = pcrs_compile_command(buf, &error)) == NULL) + { + log_error(LOG_LEVEL_ERROR, + "Adding re_filter job \'%s\' to filter %s failed: %s", + buf, bl->name, pcrs_strerror(error)); + freez(buf); + continue; + } + else + { + if (bl->joblist == NULL) + { + bl->joblist = dummy; + } + else if (NULL != lastjob) + { + lastjob->next = dummy; + } + lastjob = dummy; + log_error(LOG_LEVEL_RE_FILTER, "Adding re_filter job \'%s\' to filter %s succeeded.", buf, bl->name); + } + } else { - dummy->next = bl->joblist; - bl->joblist = dummy; - log_error(LOG_LEVEL_RE_FILTER, "Adding re_filter job %s succeeded.", buf); + log_error(LOG_LEVEL_ERROR, "Ignoring job %s outside filter block in %s, line %d", + buf, csp->config->re_filterfile[fileid], linenum); } + freez(buf); } fclose(fp); - /* the old one is now obsolete */ - if ( NULL != current_re_filterfile ) + /* + * Schedule the now-obsolete old data for unloading + */ + if (NULL != current_re_filterfile[fileid]) { - current_re_filterfile->unloader = unload_re_filterfile; + current_re_filterfile[fileid]->unloader = unload_re_filterfile; } + /* + * Chain this file into the global list of loaded files + */ fs->next = files->next; files->next = fs; - current_re_filterfile = fs; - - if (csp) - { - csp->rlist = fs; - } + current_re_filterfile[fileid] = fs; + csp->rlist[fileid] = fs; - return( 0 ); + return(0); load_re_filterfile_error: log_error(LOG_LEVEL_FATAL, "can't load re_filterfile '%s': %E", - csp->config->re_filterfile); + csp->config->re_filterfile[fileid]); return(-1); } @@ -1010,7 +1356,7 @@ void add_loader(int (*loader)(struct client_state *), { int i; - for (i=0; i < NLOADERS; i++) + for (i = 0; i < NLOADERS; i++) { if (config->loaders[i] == NULL) { @@ -1044,7 +1390,7 @@ int run_loader(struct client_state *csp) int ret = 0; int i; - for (i=0; i < NLOADERS; i++) + for (i = 0; i < NLOADERS; i++) { if (csp->config->loaders[i] == NULL) { @@ -1056,6 +1402,98 @@ int run_loader(struct client_state *csp) } +/********************************************************************* + * + * Function : file_has_been_modified + * + * Description : Helper function to check if a file has been changed + * + * Parameters : + * 1 : filename = The name of the file to check + * 2 : last_known_modification = The time of the last known + * modification + * + * Returns : TRUE if the file has been changed, + * FALSE otherwise. + * + *********************************************************************/ +static int file_has_been_modified(const char *filename, time_t last_know_modification) +{ + struct stat statbuf[1]; + + if (stat(filename, statbuf) < 0) + { + /* Error, probably file not found which counts as change. */ + return 1; + } + + return (last_know_modification != statbuf->st_mtime); +} + + +/********************************************************************* + * + * Function : any_loaded_file_changed + * + * Description : Helper function to check if any loaded file has been + * changed since the time it has been loaded. + * + * XXX: Should we cache the return value for x seconds? + * + * Parameters : + * 1 : files_to_check = List of files to check + * + * Returns : TRUE if any file has been changed, + * FALSE otherwise. + * + *********************************************************************/ +int any_loaded_file_changed(const struct client_state *csp) +{ + const struct file_list *file_to_check = csp->config->config_file_list; + int i; + + if (file_has_been_modified(file_to_check->filename, file_to_check->lastmodified)) + { + return TRUE; + } + + for (i = 0; i < MAX_AF_FILES; i++) + { + if (csp->actions_list[i]) + { + file_to_check = csp->actions_list[i]; + if (file_has_been_modified(file_to_check->filename, file_to_check->lastmodified)) + { + return TRUE; + } + } + } + + for (i = 0; i < MAX_AF_FILES; i++) + { + if (csp->rlist[i]) + { + file_to_check = csp->rlist[i]; + if (file_has_been_modified(file_to_check->filename, file_to_check->lastmodified)) + { + return TRUE; + } + } + } + +#ifdef FEATURE_TRUST + if (csp->tlist) + { + if (file_has_been_modified(csp->tlist->filename, csp->tlist->lastmodified)) + { + return TRUE; + } + } +#endif /* def FEATURE_TRUST */ + + return FALSE; +} + /* Local Variables: