X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=pcrs.c;h=007f7cc104d9b9cb40648434f938a597df30508c;hp=6da82255508c4d9a4b46f660e151712b673ed9d3;hb=HEAD;hpb=100f48795eee7d2996bff48871bf745af01811c7 diff --git a/pcrs.c b/pcrs.c index 6da82255..eaeea625 100644 --- a/pcrs.c +++ b/pcrs.c @@ -57,7 +57,7 @@ * Internal prototypes */ -static int pcrs_parse_perl_options(const char *optstring, int *flags); +static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags); static pcrs_substitute *pcrs_compile_replacement(const char *replacement, int trivialflag, int capturecount, int *errptr); static int is_hex_sequence(const char *sequence); @@ -83,25 +83,25 @@ const char *pcrs_strerror(const int error) switch (error) { /* Passed-through PCRE error: */ - case PCRE_ERROR_NOMEMORY: return "(pcre:) No memory"; + case PCREn(ERROR_NOMEMORY): return "(pcre:) No memory"; /* Shouldn't happen unless PCRE or PCRS bug, or user messed with compiled job: */ - case PCRE_ERROR_NULL: return "(pcre:) NULL code or subject or ovector"; - case PCRE_ERROR_BADOPTION: return "(pcre:) Unrecognized option bit"; - case PCRE_ERROR_BADMAGIC: return "(pcre:) Bad magic number in code"; + case PCREn(ERROR_NULL): return "(pcre:) NULL code or subject or ovector"; + case PCREn(ERROR_BADOPTION): return "(pcre:) Unrecognized option bit"; + case PCREn(ERROR_BADMAGIC): return "(pcre:) Bad magic number in code"; +#if defined(PCRE_ERROR_UNKNOWN_NODE) case PCRE_ERROR_UNKNOWN_NODE: return "(pcre:) Bad node in pattern"; - +#endif /* Can't happen / not passed: */ - case PCRE_ERROR_NOSUBSTRING: return "(pcre:) Fire in power supply"; - case PCRE_ERROR_NOMATCH: return "(pcre:) Water in power supply"; + case PCREn(ERROR_NOSUBSTRING): return "(pcre:) Fire in power supply"; + case PCREn(ERROR_NOMATCH): return "(pcre:) Water in power supply"; #ifdef PCRE_ERROR_MATCHLIMIT /* * Only reported by PCRE versions newer than our own. */ - case PCRE_ERROR_MATCHLIMIT: return "(pcre:) Match limit reached"; + case PCREn(ERROR_MATCHLIMIT): return "(pcre:) Match limit reached"; #endif /* def PCRE_ERROR_MATCHLIMIT */ - /* PCRS errors: */ case PCRS_ERR_NOMEM: return "(pcrs:) No memory"; case PCRS_ERR_CMDSYNTAX: return "(pcrs:) Syntax error while parsing command"; @@ -111,16 +111,14 @@ const char *pcrs_strerror(const int error) case PCRS_WARN_TRUNCATION: return "(pcrs:) At least one variable was too big and has been truncated before compilation"; - /* - * XXX: With the exception of PCRE_ERROR_MATCHLIMIT we - * only catch PCRE errors that can happen with our internal - * version. If Privoxy is linked against a newer - * PCRE version all bets are off ... - */ default: +#ifdef HAVE_PCRE2 + pcre2_get_error_message(error, (PCRE2_UCHAR8*)buf, sizeof(buf)); +#else snprintf(buf, sizeof(buf), "Error code %d. For details, check the pcre documentation.", error); +#endif return buf; } } @@ -149,7 +147,7 @@ const char *pcrs_strerror(const int error) * Returns : option integer suitable for pcre * *********************************************************************/ -static int pcrs_parse_perl_options(const char *optstring, int *flags) +static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags) { size_t i; int rc = 0; @@ -163,12 +161,13 @@ static int pcrs_parse_perl_options(const char *optstring, int *flags) { case 'e': break; /* ToDo ;-) */ case 'g': *flags |= PCRS_GLOBAL; break; - case 'i': rc |= PCRE_CASELESS; break; - case 'm': rc |= PCRE_MULTILINE; break; + case 'i': rc |= PCREn(CASELESS); break; + case 'm': rc |= PCREn(MULTILINE); break; case 'o': break; - case 's': rc |= PCRE_DOTALL; break; - case 'x': rc |= PCRE_EXTENDED; break; - case 'U': rc |= PCRE_UNGREEDY; break; + case 's': rc |= PCREn(DOTALL); break; + case 'x': rc |= PCREn(EXTENDED); break; + case 'D': *flags |= PCRS_DYNAMIC; break; + case 'U': rc |= PCREn(UNGREEDY); break; case 'T': *flags |= PCRS_TRIVIAL; break; default: break; } @@ -280,7 +279,7 @@ static pcrs_substitute *pcrs_compile_replacement(const char *replacement, int tr */ if (trivialflag) { - text = strncpy(text, replacement, length + 1); + strlcpy(text, replacement, length + 1); k = (int)length; } @@ -470,8 +469,24 @@ pcrs_job *pcrs_free_job(pcrs_job *job) else { next = job->next; - if (job->pattern != NULL) free(job->pattern); - if (job->hints != NULL) free(job->hints); + if (job->pattern != NULL) + { +#ifdef HAVE_PCRE2 + pcre2_code_free(job->pattern); +#else + free(job->pattern); +#endif + } +#ifndef HAVE_PCRE2 + if (job->hints != NULL) + { +#ifdef PCRE_CONFIG_JIT + pcre_free_study(job->hints); +#else + free(job->hints); +#endif + } +#endif if (job->substitute != NULL) { if (job->substitute->text != NULL) free(job->substitute->text); @@ -618,9 +633,14 @@ pcrs_job *pcrs_compile_command(const char *command, int *errptr) pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char *options, int *errptr) { pcrs_job *newjob; - int flags; + unsigned int flags; int capturecount; +#ifdef HAVE_PCRE2 + int ret; +#else + int pcre_study_options = 0; const char *error; +#endif *errptr = 0; @@ -652,32 +672,65 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * /* * Compile the pattern */ +#ifdef HAVE_PCRE2 + PCRE2_SIZE error_offset; + newjob->pattern = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, errptr, + &error_offset, NULL); +#else newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL); +#endif if (newjob->pattern == NULL) { pcrs_free_job(newjob); return NULL; } +#if defined(PCRE_STUDY_JIT_COMPILE) || defined(HAVE_PCRE2) +#ifdef DISABLE_PCRE_JIT_COMPILATION +#warning PCRE_STUDY_JIT_COMPILE is supported but Privoxy has been configured not to use it +#else + if (!(flags & PCRS_DYNAMIC)) + { +#ifdef HAVE_PCRE2 + /* Try to enable JIT compilation but continue if it's unsupported. */ + if ((ret = pcre2_jit_compile(newjob->pattern, PCRE2_JIT_COMPLETE)) && + (ret != PCRE2_ERROR_JIT_BADOPTION)) + { + *errptr = ret; + pcrs_free_job(newjob); + return NULL; + } +#else + pcre_study_options = PCRE_STUDY_JIT_COMPILE; +#endif + } +#endif +#endif +#ifndef HAVE_PCRE2 /* * Generate hints. This has little overhead, since the * hints will be NULL for a boring pattern anyway. */ - newjob->hints = pcre_study(newjob->pattern, 0, &error); + newjob->hints = pcre_study(newjob->pattern, pcre_study_options, &error); if (error != NULL) { *errptr = PCRS_ERR_STUDY; pcrs_free_job(newjob); return NULL; } - +#endif /* * Determine the number of capturing subpatterns. * This is needed for handling $+ in the substitute. */ +#ifdef HAVE_PCRE2 + if (0 > (*errptr = pcre2_pattern_info(newjob->pattern, PCRE2_INFO_CAPTURECOUNT, &capturecount))) +#else if (0 > (*errptr = pcre_fullinfo(newjob->pattern, newjob->hints, PCRE_INFO_CAPTURECOUNT, &capturecount))) +#endif { pcrs_free_job(newjob); return NULL; @@ -789,14 +842,20 @@ int pcrs_execute_list(pcrs_job *joblist, char *subject, size_t subject_length, c *********************************************************************/ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char **result, size_t *result_length) { - int offsets[3 * PCRS_MAX_SUBMATCHES], - offset, + int offset, i, k, matches_found, submatches, max_matches = PCRS_MAX_MATCH_INIT; size_t newsize; +#ifdef HAVE_PCRE2 + pcrs_match *matches, *dummy; + pcre2_match_data *pcre2_matches; + size_t *offsets; +#else pcrs_match *matches, *dummy; + int offsets[3 * PCRS_MAX_SUBMATCHES]; +#endif char *result_offset; offset = i = 0; @@ -810,27 +869,38 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char return(PCRS_ERR_BADJOB); } +#ifdef HAVE_PCRE2 + if (NULL == (pcre2_matches = pcre2_match_data_create_from_pattern(job->pattern, NULL))) + { + return(PCRS_ERR_NOMEM); + } + offsets = pcre2_get_ovector_pointer(pcre2_matches); +#endif if (NULL == (matches = (pcrs_match *)malloc((size_t)max_matches * sizeof(pcrs_match)))) { return(PCRS_ERR_NOMEM); } memset(matches, '\0', (size_t)max_matches * sizeof(pcrs_match)); - /* * Find the pattern and calculate the space * requirements for the result */ newsize = subject_length; +#ifdef HAVE_PCRE2 + while ((submatches = pcre2_match(job->pattern, (const unsigned char *)subject, + subject_length, (size_t)offset, 0, pcre2_matches, NULL)) > 0) +#else while ((submatches = pcre_exec(job->pattern, job->hints, subject, (int)subject_length, offset, 0, offsets, 3 * PCRS_MAX_SUBMATCHES)) > 0) +#endif { job->flags |= PCRS_SUCCESS; matches[i].submatches = submatches; for (k = 0; k < submatches; k++) { - matches[i].submatch_offset[k] = offsets[2 * k]; + matches[i].submatch_offset[k] = (int)offsets[2 * k]; /* Note: Non-found optional submatches have length -1-(-1)==0 */ matches[i].submatch_length[k] = (size_t)(offsets[2 * k + 1] - offsets[2 * k]); @@ -847,7 +917,7 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char newsize += (size_t)offsets[0] * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES]; /* chunk after match */ - matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = offsets[1]; + matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = (int)offsets[1]; matches[i].submatch_length[PCRS_MAX_SUBMATCHES + 1] = subject_length - (size_t)offsets[1] - 1; newsize += (subject_length - (size_t)offsets[1]) * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES + 1]; @@ -874,12 +944,19 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char break; /* Go find the next one */ else - offset = offsets[1]; + offset = (int)offsets[1]; } /* Pass pcre error through if (bad) failure */ +#ifdef HAVE_PCRE2 + if (submatches < PCRE2_ERROR_NOMATCH) +#else if (submatches < PCRE_ERROR_NOMATCH) +#endif { free(matches); +#ifdef HAVE_PCRE2 + pcre2_match_data_free(pcre2_matches); +#endif return submatches; } matches_found = i; @@ -889,9 +966,19 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char * Get memory for the result (must be freed by caller!) * and append terminating null byte. */ - if ((*result = (char *)malloc(newsize + 1)) == NULL) + if ((*result = (char *)malloc(newsize + 1 +#ifdef HAVE_PCRE2 + /* + * Work around to prevent invalid reads in the jit code. + */ + + 16 +#endif + )) == NULL) { free(matches); +#ifdef HAVE_PCRE2 + pcre2_match_data_free(pcre2_matches); +#endif return PCRS_ERR_NOMEM; } else @@ -944,6 +1031,9 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char memcpy(result_offset, subject + offset, subject_length - (size_t)offset); *result_length = newsize; +#ifdef HAVE_PCRE2 + pcre2_match_data_free(pcre2_matches); +#endif free(matches); return matches_found; @@ -995,7 +1085,7 @@ static int is_hex_sequence(const char *sequence) * FALSE * *********************************************************************/ -int pcrs_job_is_dynamic (char *job) +int pcrs_job_is_dynamic(char *job) { const char delimiter = job[1]; const size_t length = strlen(job); @@ -1081,7 +1171,7 @@ char pcrs_get_delimiter(const char *string) *********************************************************************/ char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, int *hits) { - size_t size; + size_t buffer_size, new_size; char *result = NULL; pcrs_job *job; @@ -1089,12 +1179,12 @@ char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, assert(pcrs_command); *hits = 0; - size = strlen(subject); + buffer_size = strlen(subject); job = pcrs_compile_command(pcrs_command, hits); if (NULL != job) { - *hits = pcrs_execute(job, subject, size, &result, &size); + *hits = pcrs_execute(job, subject, buffer_size, &result, &new_size); if (*hits < 0) { freez(result); @@ -1106,7 +1196,6 @@ char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, } -static const char warning[] = "... [too long, truncated]"; /********************************************************************* * * Function : pcrs_compile_dynamic_command @@ -1163,7 +1252,7 @@ pcrs_job *pcrs_compile_dynamic_command(char *pcrs_command, const struct pcrs_var */ assert(NULL == strchr(v->name, d)); - ret = snprintf(buf, sizeof(buf), "s%c\\$%s%c%s%cgT", d, v->name, d, v->value, d); + ret = snprintf(buf, sizeof(buf), "s%c\\$%s%c%s%cDgT", d, v->name, d, v->value, d); assert(ret >= 0); if (ret >= sizeof(buf)) { @@ -1173,10 +1262,11 @@ pcrs_job *pcrs_compile_dynamic_command(char *pcrs_command, const struct pcrs_var * with a truncation message and close the pattern * properly. */ - const size_t trailer_size = sizeof(warning) + 3; /* 3 for d + "gT" */ + static const char warning[] = "... [too long, truncated]"; + const size_t trailer_size = sizeof(warning) + 4; /* 4 for d + "DgT" */ char *trailer_start = buf + sizeof(buf) - trailer_size; - ret = snprintf(trailer_start, trailer_size, "%s%cgT", warning, d); + ret = snprintf(trailer_start, trailer_size, "%s%cDgT", warning, d); assert(ret == trailer_size - 1); assert(sizeof(buf) == strlen(buf) + 1); truncation = 1;