+ while (ijb_isspace(*q))
+ {
+ q++;
+ }
+ log_error(LOG_LEVEL_HEADER, "Reducing white space in '%s'", header);
+ string_move(p+1, q);
+ }
+
+ if (*p == '\t')
+ {
+ log_error(LOG_LEVEL_HEADER,
+ "Converting tab to space in '%s'", header);
+ *p = ' ';
+ }
+ else if (*p == '"')
+ {
+ char *end_of_token = strstr(p+1, "\"");
+
+ if (NULL != end_of_token)
+ {
+ /* Don't mess with quoted text. */
+ p = end_of_token;
+ }
+ else
+ {
+ log_error(LOG_LEVEL_HEADER,
+ "Ignoring single quote in '%s'", header);
+ }
+ }
+ p++;
+ }
+
+ p = strchr(header, ':');
+ if ((p != NULL) && (p != header) && ijb_isspace(*(p-1)))
+ {
+ /*
+ * There's still space before the colon.
+ * We don't want it.
+ */
+ string_move(p-1, p);
+ }
+}
+
+
+/*********************************************************************
+ *
+ * Function : get_header
+ *
+ * Description : This (odd) routine will parse the csp->iob
+ * to get the next complete header.
+ *
+ * Parameters :
+ * 1 : iob = The I/O buffer to parse, usually csp->iob.
+ *
+ * Returns : Any one of the following:
+ *
+ * 1) a pointer to a dynamically allocated string that contains a header line
+ * 2) NULL indicating that the end of the header was reached
+ * 3) "" indicating that the end of the iob was reached before finding
+ * a complete header line.
+ *
+ *********************************************************************/
+char *get_header(struct iob *iob)
+{
+ char *header;
+
+ header = get_header_line(iob);
+
+ if ((header == NULL) || (*header == '\0'))
+ {
+ /*
+ * No complete header read yet, tell the client.
+ */
+ return header;
+ }
+
+ while ((iob->cur[0] == ' ') || (iob->cur[0] == '\t'))
+ {
+ /*
+ * Header spans multiple lines, append the next one.
+ */
+ char *continued_header;
+
+ continued_header = get_header_line(iob);
+ if ((continued_header == NULL) || (*continued_header == '\0'))
+ {
+ /*
+ * No complete header read yet, return what we got.
+ * XXX: Should "unread" header instead.
+ */
+ log_error(LOG_LEVEL_INFO,
+ "Failed to read a multi-line header properly: '%s'",
+ header);
+ break;
+ }
+
+ if (JB_ERR_OK != string_join(&header, continued_header))
+ {
+ log_error(LOG_LEVEL_FATAL,
+ "Out of memory while appending multiple headers.");
+ }
+ else
+ {
+ /* XXX: remove before next stable release. */
+ log_error(LOG_LEVEL_HEADER,
+ "Merged multiple header lines to: '%s'",
+ header);
+ }
+ }
+
+ normalize_lws(header);
+
+ return header;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function : get_header_line
+ *
+ * Description : This (odd) routine will parse the csp->iob
+ * to get the next header line.
+ *
+ * Parameters :
+ * 1 : iob = The I/O buffer to parse, usually csp->iob.
+ *
+ * Returns : Any one of the following:
+ *
+ * 1) a pointer to a dynamically allocated string that contains a header line
+ * 2) NULL indicating that the end of the header was reached
+ * 3) "" indicating that the end of the iob was reached before finding
+ * a complete header line.
+ *
+ *********************************************************************/
+static char *get_header_line(struct iob *iob)
+{
+ char *p, *q, *ret;
+
+ if ((iob->cur == NULL)
+ || ((p = strchr(iob->cur, '\n')) == NULL))
+ {
+ return(""); /* couldn't find a complete header */
+ }
+
+ *p = '\0';
+
+ ret = strdup(iob->cur);
+ if (ret == NULL)
+ {
+ /* FIXME No way to handle error properly */
+ log_error(LOG_LEVEL_FATAL, "Out of memory in get_header_line()");
+ }
+
+ iob->cur = p+1;
+
+ if ((q = strchr(ret, '\r')) != NULL) *q = '\0';
+
+ /* is this a blank line (i.e. the end of the header) ? */
+ if (*ret == '\0')
+ {
+ freez(ret);
+ return(NULL);
+ }
+
+ return(ret);
+
+}
+
+
+/*********************************************************************
+ *
+ * Function : get_header_value
+ *
+ * Description : Get the value of a given header from a chained list
+ * of header lines or return NULL if no such header is
+ * present in the list.
+ *
+ * Parameters :
+ * 1 : header_list = pointer to list
+ * 2 : header_name = string with name of header to look for.
+ * Trailing colon required, capitalization
+ * doesn't matter.
+ *
+ * Returns : NULL if not found, else value of header
+ *
+ *********************************************************************/
+char *get_header_value(const struct list *header_list, const char *header_name)
+{
+ struct list_entry *cur_entry;
+ char *ret = NULL;
+ size_t length = 0;
+
+ assert(header_list);
+ assert(header_name);
+ length = strlen(header_name);
+
+ for (cur_entry = header_list->first; cur_entry ; cur_entry = cur_entry->next)
+ {
+ if (cur_entry->str)
+ {
+ if (!strncmpic(cur_entry->str, header_name, length))
+ {
+ /*
+ * Found: return pointer to start of value
+ */
+ ret = (char *) (cur_entry->str + length);
+ while (*ret && ijb_isspace(*ret)) ret++;
+ return(ret);
+ }
+ }
+ }
+
+ /*
+ * Not found
+ */
+ return NULL;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function : scan_headers
+ *
+ * Description : Scans headers, applies tags and updates action bits.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : JB_ERR_OK
+ *
+ *********************************************************************/
+static jb_err scan_headers(struct client_state *csp)
+{
+ struct list_entry *h; /* Header */
+ jb_err err = JB_ERR_OK;
+
+ for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next)
+ {
+ /* Header crunch()ed in previous run? -> ignore */
+ if (h->str == NULL) continue;
+ log_error(LOG_LEVEL_HEADER, "scan: %s", h->str);
+ err = header_tagger(csp, h->str);
+ }
+
+ return err;
+}
+
+
+/*********************************************************************
+ *
+ * Function : sed
+ *
+ * Description : add, delete or modify lines in the HTTP header streams.
+ * On entry, it receives a linked list of headers space
+ * that was allocated dynamically (both the list nodes
+ * and the header contents).
+ *
+ * As a side effect it frees the space used by the original
+ * header lines.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : filter_server_headers = Boolean to switch between
+ * server and header filtering.
+ *
+ * Returns : JB_ERR_OK in case off success, or
+ * JB_ERR_MEMORY on out-of-memory error.
+ *
+ *********************************************************************/
+jb_err sed(struct client_state *csp, int filter_server_headers)
+{
+ /* XXX: use more descriptive names. */
+ struct list_entry *p;
+ const struct parsers *v;
+ const add_header_func_ptr *f;
+ jb_err err = JB_ERR_OK;
+
+ if (filter_server_headers)
+ {
+ v = server_patterns;
+ f = add_server_headers;
+ }
+ else
+ {
+ v = client_patterns;
+ f = add_client_headers;
+ }
+
+ scan_headers(csp);
+
+ while ((err == JB_ERR_OK) && (v->str != NULL))
+ {
+ for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next)
+ {
+ /* Header crunch()ed in previous run? -> ignore */
+ if (p->str == NULL) continue;
+
+ /* Does the current parser handle this header? */
+ if ((strncmpic(p->str, v->str, v->len) == 0) ||
+ (v->len == CHECK_EVERY_HEADER_REMAINING))
+ {
+ err = v->parser(csp, &(p->str));
+ }
+ }
+ v++;
+ }
+
+ /* place additional headers on the csp->headers list */
+ while ((err == JB_ERR_OK) && (*f))
+ {
+ err = (*f)(csp);
+ f++;
+ }
+
+ return err;
+}
+
+
+/*********************************************************************
+ *
+ * Function : update_server_headers
+ *
+ * Description : Updates server headers after the body has been modified.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : JB_ERR_OK in case off success, or
+ * JB_ERR_MEMORY on out-of-memory error.
+ *
+ *********************************************************************/
+jb_err update_server_headers(struct client_state *csp)
+{
+ jb_err err = JB_ERR_OK;
+
+ static const struct parsers server_patterns_light[] = {
+ { "Content-Length:", 15, server_content_length },
+ { "Transfer-Encoding:", 18, server_transfer_coding },
+#ifdef FEATURE_ZLIB
+ { "Content-Encoding:", 17, server_content_encoding },
+#endif /* def FEATURE_ZLIB */
+ { NULL, 0, NULL }
+ };
+
+ if (strncmpic(csp->http->cmd, "HEAD", 4))
+ {
+ const struct parsers *v;
+ struct list_entry *p;
+
+ for (v = server_patterns_light; (err == JB_ERR_OK) && (v->str != NULL); v++)
+ {
+ for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next)
+ {
+ /* Header crunch()ed in previous run? -> ignore */
+ if (p->str == NULL) continue;
+
+ /* Does the current parser handle this header? */
+ if (strncmpic(p->str, v->str, v->len) == 0)
+ {
+ err = v->parser(csp, (char **)&(p->str));
+ }
+ }
+ }
+ }
+
+ return err;
+}
+
+
+/*********************************************************************
+ *
+ * Function : header_tagger
+ *
+ * Description : Executes all text substitutions from applying
+ * tag actions and saves the result as tag.
+ *
+ * XXX: Shares enough code with filter_header() and
+ * pcrs_filter_response() to warrant some helper functions.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : header = Header that is used as tagger input
+ *
+ * Returns : JB_ERR_OK on success and always succeeds
+ *
+ *********************************************************************/
+static jb_err header_tagger(struct client_state *csp, char *header)
+{
+ int wanted_filter_type;
+ int multi_action_index;
+ int i;
+ pcrs_job *job;
+
+ struct file_list *fl;
+ struct re_filterfile_spec *b;
+ struct list_entry *tag_name;
+
+ int found_filters = 0;
+ const size_t header_length = strlen(header);
+
+ if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
+ {
+ wanted_filter_type = FT_SERVER_HEADER_TAGGER;
+ multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER;
+ }
+ else
+ {
+ wanted_filter_type = FT_CLIENT_HEADER_TAGGER;
+ multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER;
+ }
+
+ /* Check if there are any filters */
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if (NULL != fl)
+ {
+ if (NULL != fl->f)
+ {
+ found_filters = 1;
+ break;
+ }
+ }
+ }
+
+ if (0 == found_filters)
+ {
+ log_error(LOG_LEVEL_ERROR, "Inconsistent configuration: "
+ "tagging enabled, but no taggers available.");
+ return(JB_ERR_OK);
+ }
+
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ fl = csp->rlist[i];
+ if ((NULL == fl) || (NULL == fl->f))
+ {
+ /*
+ * Either there are no filter files
+ * left, or this filter file just
+ * contains no valid filters.
+ *
+ * Continue to be sure we don't miss
+ * valid filter files that are chained
+ * after empty or invalid ones.
+ */
+ continue;
+ }
+
+ /* For all filters, */
+ for (b = fl->f; b; b = b->next)
+ {
+ if (b->type != wanted_filter_type)
+ {
+ /* skip the ones we don't care about, */
+ continue;
+ }
+ /* leaving only taggers that could apply, of which we use the ones, */
+ for (tag_name = csp->action->multi[multi_action_index]->first;
+ NULL != tag_name; tag_name = tag_name->next)
+ {
+ /* that do apply, and */
+ if (strcmp(b->name, tag_name->str) == 0)
+ {
+ char *modified_tag = NULL;
+ char *tag = header;
+ size_t size = header_length;
+ pcrs_job *joblist = b->joblist;
+
+ if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
+
+ if (NULL == joblist)
+ {
+ log_error(LOG_LEVEL_RE_FILTER,
+ "Tagger %s has empty joblist. Nothing to do.", b->name);
+ continue;
+ }
+
+ /* execute their pcrs_joblist on the header. */
+ for (job = joblist; NULL != job; job = job->next)
+ {
+ const int hits = pcrs_execute(job, tag, size, &modified_tag, &size);
+
+ if (0 < hits)
+ {
+ /* Success, continue with the modified version. */
+ if (tag != header)
+ {
+ freez(tag);
+ }
+ tag = modified_tag;
+ }
+ else
+ {
+ /* Tagger doesn't match */
+ if (0 > hits)
+ {
+ /* Regex failure, log it but continue anyway. */
+ log_error(LOG_LEVEL_ERROR,
+ "Problems with tagger \'%s\' and header \'%s\': %s",
+ b->name, *header, pcrs_strerror(hits));
+ }
+ freez(modified_tag);
+ }
+ }
+
+ if (b->dynamic) pcrs_free_joblist(joblist);
+
+ /* If this tagger matched */
+ if (tag != header)
+ {
+ if (0 == size)
+ {
+ /*
+ * There is to technical limitation which makes
+ * it impossible to use empty tags, but I assume
+ * no one would do it intentionally.
+ */
+ freez(tag);
+ log_error(LOG_LEVEL_INFO,
+ "Tagger \'%s\' created an empty tag. Ignored.",
+ b->name);
+ continue;
+ }
+
+ if (!list_contains_item(csp->tags, tag))
+ {
+ if (JB_ERR_OK != enlist(csp->tags, tag))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Insufficient memory to add tag \'%s\', "
+ "based on tagger \'%s\' and header \'%s\'",
+ tag, b->name, *header);
+ }
+ else
+ {
+ char *action_message;
+ /*
+ * update the action bits right away, to make
+ * tagging based on tags set by earlier taggers
+ * of the same kind possible.
+ */
+ if (update_action_bits_for_tag(csp, tag))
+ {
+ action_message = "Action bits updated accordingly.";
+ }
+ else
+ {
+ action_message = "No action bits update necessary.";
+ }
+
+ log_error(LOG_LEVEL_HEADER,
+ "Tagger \'%s\' added tag \'%s\'. %s",
+ b->name, tag, action_message);
+ }
+ }
+ else
+ {
+ /* XXX: Is this log-worthy? */
+ log_error(LOG_LEVEL_HEADER,
+ "Tagger \'%s\' didn't add tag \'%s\'. "
+ "Tag already present", b->name, tag);
+ }
+ freez(tag);
+ } /* if the tagger matched */
+ } /* if the tagger applies */
+ } /* for every tagger that could apply */
+ } /* for all filters */
+ } /* for all filter files */
+
+ return JB_ERR_OK;
+}
+
+/* here begins the family of parser functions that reformat header lines */
+
+/*********************************************************************