openwrt-packages/net/privoxy/patches/100-Add-pcre2-support.patch

1015 lines
33 KiB
Diff

From 53748ca8ca3c893025be34dd4f104546fcbd0602 Mon Sep 17 00:00:00 2001
From: Fabian Keil <fk@fabiankeil.de>
Date: Sat, 17 Jun 2023 13:20:24 +0200
Subject: [PATCH] Add pcre2 support
This is currently expected to cause crashes on Windows
when compiled with GUI support.
Closes bug #935.
Initial patch submitted by: Gagan Sidhu
---
acconfig.h | 6 ++
actions.c | 8 +++
cgi.c | 9 ++-
client-tags.c | 5 ++
configure.in | 64 ++++++++++++++++-
pcrs.c | 148 +++++++++++++++++++++++++++++----------
pcrs.h | 40 +++++++----
project.h | 54 +++++++++-----
templates/show-status | 5 +-
urlmatch.c | 159 ++++++++++++++++++++++++++++++++++++++++++
urlmatch.h | 4 ++
w32log.c | 3 +
12 files changed, 430 insertions(+), 75 deletions(-)
--- a/acconfig.h
+++ b/acconfig.h
@@ -225,11 +225,17 @@
/* Define if pcre.h must be included as <pcre/pcre.h>
*/
#undef PCRE_H_IN_SUBDIR
+#undef PCRE2_H_IN_SUBDIR
+
+#undef HAVE_PCRE2
+#undef HAVE_PCRE2POSIX
/* Define if pcreposix.h must be included as <pcre/pcreposix.h>
*/
#undef PCREPOSIX_H_IN_SUBDIR
+#undef PCRE2POSIX_H_IN_SUBDIR
+
@BOTTOM@
/*
--- a/actions.c
+++ b/actions.c
@@ -828,8 +828,12 @@ int update_action_bits_for_tag(struct cl
continue;
}
+#ifdef HAVE_PCRE2
+ if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag))
+#else
/* and check if one of the tag patterns matches the tag, */
if (0 == regexec(b->url->pattern.tag_regex, tag, 0, NULL, 0))
+#endif
{
/* if it does, update the action bit map, */
if (merge_current_action(csp->action, b->action))
@@ -884,7 +888,11 @@ jb_err check_negative_tag_patterns(struc
}
for (tag = csp->tags->first; NULL != tag; tag = tag->next)
{
+#ifdef HAVE_PCRE2
+ if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag->str))
+#else
if (0 == regexec(b->url->pattern.tag_regex, tag->str, 0, NULL, 0))
+#endif
{
/*
* The pattern matches at least one tag, thus the action
--- a/cgi.c
+++ b/cgi.c
@@ -2023,7 +2023,7 @@ jb_err template_fill(char **template_ptr
char buf[BUFFER_SIZE];
char *tmp_out_buffer;
char *file_buffer;
- size_t size;
+ size_t buffer_size, new_size;
int error;
const char *flags;
@@ -2032,7 +2032,7 @@ jb_err template_fill(char **template_ptr
assert(exports);
file_buffer = *template_ptr;
- size = strlen(file_buffer) + 1;
+ buffer_size = strlen(file_buffer) + 1;
/*
* Assemble pcrs joblist from exports map
@@ -2082,7 +2082,10 @@ jb_err template_fill(char **template_ptr
}
else
{
- error = pcrs_execute(job, file_buffer, size, &tmp_out_buffer, &size);
+ error = pcrs_execute(job, file_buffer, buffer_size, &tmp_out_buffer,
+ &new_size);
+
+ buffer_size = new_size;
pcrs_free_job(job);
if (NULL == tmp_out_buffer)
--- a/client-tags.c
+++ b/client-tags.c
@@ -43,6 +43,7 @@
#include "miscutil.h"
#include "errlog.h"
#include "parsers.h"
+#include "urlmatch.h"
struct client_specific_tag
{
@@ -658,7 +659,11 @@ int client_tag_match(const struct patter
for (tag = tags->first; tag != NULL; tag = tag->next)
{
+#ifdef HAVE_PCRE2
+ if (pcre2_pattern_matches(pattern->pattern.tag_regex, tag->str))
+#else
if (0 == regexec(pattern->pattern.tag_regex, tag->str, 0, NULL, 0))
+#endif
{
log_error(LOG_LEVEL_TAGGING, "Client tag '%s' matches.", tag->str);
return 1;
--- a/configure.in
+++ b/configure.in
@@ -863,12 +863,47 @@ else
])
fi
+AC_ARG_ENABLE(pcre2,
+[ --disable-pcre2 Don't try to use pcre2 even if it's available],
+[enableval2=$enableval],
+[enableval2=yes])
+if test $enableval2 = yes; then
+ try_pcre2=yes
+else
+ AC_MSG_WARN([Ignoring pcre2 even if it's available])
+ try_pcre2=no
+fi
+
+if test $try_pcre2 != no; then
dnl =================================================================
dnl Checks for libraries.
dnl =================================================================
dnl Note: Some systems may have the library but not the system header
dnl file, so we must check for both.
dnl Also check for correct version
+AC_CHECK_LIB(pcre2-8, pcre2_compile_8, [
+ AC_CHECK_HEADER(pcre2.h, [
+ AC_EGREP_HEADER(pcre2_pattern_info, pcre2.h,[have_pcre2=yes; AC_DEFINE(HAVE_PCRE2)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no])
+ ], [
+ AC_CHECK_HEADER(pcre2/pcre2.h, [
+ AC_EGREP_HEADER(pcre2_pattern_info, pcre2/pcre2.h, [have_pcre2=yes; AC_DEFINE(PCRE2_H_IN_SUBDIR)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no])
+ ], [have_pcre2=no])
+ ], [#define PCRE2_CODE_UNIT_WIDTH 8])
+], [have_pcre2=no])
+
+AC_CHECK_LIB(pcre2-posix, regcomp, [
+ AC_CHECK_HEADER(pcre2posix.h, [
+ AC_EGREP_HEADER(pcre2_regerror, pcre2posix.h, [have_pcre2posix=yes],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no])
+ ], [
+ AC_CHECK_HEADER(pcre/pcre2posix.h, [
+ AC_EGREP_HEADER(pcre2_regerror, pcre2/pcre2posix.h, [have_pcre2posix=yes; AC_DEFINE(PCRE2POSIX_H_IN_SUBDIR)],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no])
+ ], [have_pcre2posix=no])
+ ])
+], [have_pcre2posix=no], -lpcre2-8)
+fi
+
+if test $have_pcre2 = "no"; then
+
AC_CHECK_LIB(pcre, pcre_compile, [
AC_CHECK_HEADER(pcre.h, [
AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no])
@@ -889,6 +924,7 @@ AC_CHECK_LIB(pcreposix, regcomp, [
])
], [have_pcreposix=no], -lpcre)
+fi
dnl ================================================================
dnl libpcrs is temporarily disabled.
dnl
@@ -1095,6 +1131,31 @@ fi
# we don't need pcreposix, then link pcre dynamically; else
# build it and link statically
#
+
+#check for libpcre2 first. then regular pcre
+
+if test $have_pcre2 = "yes"; then
+ echo "using libpcre2"
+ STATIC_PCRE_ONLY=#
+ LIBS="$LIBS -lpcre2-8 -lpcre2-posix"
+ if test "$use_static_pcre" = "yes"; then
+ pcre_dyn=no
+ AC_DEFINE(PCRE_STATIC,1,[Define to statically link to pcre library on Windows.])
+# see /usr/i686-w64-mingw32/sys-root/mingw/include/pcre.h line 54
+# #if defined(_WIN32) && !defined(PCRE_STATIC)
+# # ifndef PCRE_EXP_DECL
+# # define PCRE_EXP_DECL extern __declspec(dllimport)
+# # endif
+# If you want to statically link a program against a PCRE library in the form of
+# a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
+# pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
+# be declared __declspec(dllimport), with unwanted results.
+ else
+ pcre_dyn=yes
+ AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.])
+ fi
+else
+
if test $have_pcre = "yes"; then
echo "using libpcre"
STATIC_PCRE_ONLY=#
@@ -1116,7 +1177,8 @@ if test $have_pcre = "yes"; then
AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.])
fi
else
- AC_MSG_ERROR(pcre library not detected.)
+ AC_MSG_ERROR(Detected neither pcre2 nor pcre library.)
+fi
fi
AC_DEFINE(FEATURE_CONNECTION_KEEP_ALIVE)
--- a/pcrs.c
+++ b/pcrs.c
@@ -57,7 +57,7 @@
* Internal prototypes
*/
-static int pcrs_parse_perl_options(const char *optstring, int *flags);
+static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags);
static pcrs_substitute *pcrs_compile_replacement(const char *replacement, int trivialflag,
int capturecount, int *errptr);
static int is_hex_sequence(const char *sequence);
@@ -83,25 +83,25 @@ const char *pcrs_strerror(const int erro
switch (error)
{
/* Passed-through PCRE error: */
- case PCRE_ERROR_NOMEMORY: return "(pcre:) No memory";
+ case PCREn(ERROR_NOMEMORY): return "(pcre:) No memory";
/* Shouldn't happen unless PCRE or PCRS bug, or user messed with compiled job: */
- case PCRE_ERROR_NULL: return "(pcre:) NULL code or subject or ovector";
- case PCRE_ERROR_BADOPTION: return "(pcre:) Unrecognized option bit";
- case PCRE_ERROR_BADMAGIC: return "(pcre:) Bad magic number in code";
+ case PCREn(ERROR_NULL): return "(pcre:) NULL code or subject or ovector";
+ case PCREn(ERROR_BADOPTION): return "(pcre:) Unrecognized option bit";
+ case PCREn(ERROR_BADMAGIC): return "(pcre:) Bad magic number in code";
+#if defined(PCRE_ERROR_UNKNOWN_NODE)
case PCRE_ERROR_UNKNOWN_NODE: return "(pcre:) Bad node in pattern";
-
+#endif
/* Can't happen / not passed: */
- case PCRE_ERROR_NOSUBSTRING: return "(pcre:) Fire in power supply";
- case PCRE_ERROR_NOMATCH: return "(pcre:) Water in power supply";
+ case PCREn(ERROR_NOSUBSTRING): return "(pcre:) Fire in power supply";
+ case PCREn(ERROR_NOMATCH): return "(pcre:) Water in power supply";
#ifdef PCRE_ERROR_MATCHLIMIT
/*
* Only reported by PCRE versions newer than our own.
*/
- case PCRE_ERROR_MATCHLIMIT: return "(pcre:) Match limit reached";
+ case PCREn(ERROR_MATCHLIMIT): return "(pcre:) Match limit reached";
#endif /* def PCRE_ERROR_MATCHLIMIT */
-
/* PCRS errors: */
case PCRS_ERR_NOMEM: return "(pcrs:) No memory";
case PCRS_ERR_CMDSYNTAX: return "(pcrs:) Syntax error while parsing command";
@@ -111,16 +111,14 @@ const char *pcrs_strerror(const int erro
case PCRS_WARN_TRUNCATION:
return "(pcrs:) At least one variable was too big and has been truncated before compilation";
- /*
- * XXX: With the exception of PCRE_ERROR_MATCHLIMIT we
- * only catch PCRE errors that can happen with our internal
- * version. If Privoxy is linked against a newer
- * PCRE version all bets are off ...
- */
default:
+#ifdef HAVE_PCRE2
+ pcre2_get_error_message(error, (PCRE2_UCHAR8*)buf, sizeof(buf));
+#else
snprintf(buf, sizeof(buf),
"Error code %d. For details, check the pcre documentation.",
error);
+#endif
return buf;
}
}
@@ -149,7 +147,7 @@ const char *pcrs_strerror(const int erro
* Returns : option integer suitable for pcre
*
*********************************************************************/
-static int pcrs_parse_perl_options(const char *optstring, int *flags)
+static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags)
{
size_t i;
int rc = 0;
@@ -163,13 +161,13 @@ static int pcrs_parse_perl_options(const
{
case 'e': break; /* ToDo ;-) */
case 'g': *flags |= PCRS_GLOBAL; break;
- case 'i': rc |= PCRE_CASELESS; break;
- case 'm': rc |= PCRE_MULTILINE; break;
+ case 'i': rc |= PCREn(CASELESS); break;
+ case 'm': rc |= PCREn(MULTILINE); break;
case 'o': break;
- case 's': rc |= PCRE_DOTALL; break;
- case 'x': rc |= PCRE_EXTENDED; break;
+ case 's': rc |= PCREn(DOTALL); break;
+ case 'x': rc |= PCREn(EXTENDED); break;
case 'D': *flags |= PCRS_DYNAMIC; break;
- case 'U': rc |= PCRE_UNGREEDY; break;
+ case 'U': rc |= PCREn(UNGREEDY); break;
case 'T': *flags |= PCRS_TRIVIAL; break;
default: break;
}
@@ -471,7 +469,15 @@ pcrs_job *pcrs_free_job(pcrs_job *job)
else
{
next = job->next;
- if (job->pattern != NULL) free(job->pattern);
+ if (job->pattern != NULL)
+ {
+#ifdef HAVE_PCRE2
+ pcre2_code_free(job->pattern);
+#else
+ free(job->pattern);
+#endif
+ }
+#ifndef HAVE_PCRE2
if (job->hints != NULL)
{
#ifdef PCRE_CONFIG_JIT
@@ -480,6 +486,7 @@ pcrs_job *pcrs_free_job(pcrs_job *job)
free(job->hints);
#endif
}
+#endif
if (job->substitute != NULL)
{
if (job->substitute->text != NULL) free(job->substitute->text);
@@ -626,10 +633,14 @@ pcrs_job *pcrs_compile_command(const cha
pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char *options, int *errptr)
{
pcrs_job *newjob;
- int flags;
+ unsigned int flags;
int capturecount;
- const char *error;
+#ifdef HAVE_PCRE2
+ int ret;
+#else
int pcre_study_options = 0;
+ const char *error;
+#endif
*errptr = 0;
@@ -661,25 +672,43 @@ pcrs_job *pcrs_compile(const char *patte
/*
* Compile the pattern
*/
+#ifdef HAVE_PCRE2
+ PCRE2_SIZE error_offset;
+ newjob->pattern = pcre2_compile((const unsigned char *)pattern,
+ PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, errptr,
+ &error_offset, NULL);
+#else
newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL);
+#endif
if (newjob->pattern == NULL)
{
pcrs_free_job(newjob);
return NULL;
}
-
-#ifdef PCRE_STUDY_JIT_COMPILE
+#if defined(PCRE_STUDY_JIT_COMPILE) || defined(HAVE_PCRE2)
#ifdef DISABLE_PCRE_JIT_COMPILATION
#warning PCRE_STUDY_JIT_COMPILE is supported but Privoxy has been configured not to use it
#else
if (!(flags & PCRS_DYNAMIC))
{
+#ifdef HAVE_PCRE2
+ /* Try to enable JIT compilation but continue if it's unsupported. */
+ if ((ret = pcre2_jit_compile(newjob->pattern, PCRE2_JIT_COMPLETE)) &&
+ (ret != PCRE2_ERROR_JIT_BADOPTION))
+ {
+ *errptr = ret;
+ pcrs_free_job(newjob);
+ return NULL;
+ }
+#else
pcre_study_options = PCRE_STUDY_JIT_COMPILE;
+#endif
}
#endif
#endif
+#ifndef HAVE_PCRE2
/*
* Generate hints. This has little overhead, since the
* hints will be NULL for a boring pattern anyway.
@@ -691,13 +720,17 @@ pcrs_job *pcrs_compile(const char *patte
pcrs_free_job(newjob);
return NULL;
}
-
+#endif
/*
* Determine the number of capturing subpatterns.
* This is needed for handling $+ in the substitute.
*/
+#ifdef HAVE_PCRE2
+ if (0 > (*errptr = pcre2_pattern_info(newjob->pattern, PCRE2_INFO_CAPTURECOUNT, &capturecount)))
+#else
if (0 > (*errptr = pcre_fullinfo(newjob->pattern, newjob->hints, PCRE_INFO_CAPTURECOUNT, &capturecount)))
+#endif
{
pcrs_free_job(newjob);
return NULL;
@@ -809,14 +842,20 @@ int pcrs_execute_list(pcrs_job *joblist,
*********************************************************************/
int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char **result, size_t *result_length)
{
- int offsets[3 * PCRS_MAX_SUBMATCHES],
- offset,
+ int offset,
i, k,
matches_found,
submatches,
max_matches = PCRS_MAX_MATCH_INIT;
size_t newsize;
+#ifdef HAVE_PCRE2
pcrs_match *matches, *dummy;
+ pcre2_match_data *pcre2_matches;
+ size_t *offsets;
+#else
+ pcrs_match *matches, *dummy;
+ int offsets[3 * PCRS_MAX_SUBMATCHES];
+#endif
char *result_offset;
offset = i = 0;
@@ -830,27 +869,38 @@ int pcrs_execute(pcrs_job *job, const ch
return(PCRS_ERR_BADJOB);
}
+#ifdef HAVE_PCRE2
+ if (NULL == (pcre2_matches = pcre2_match_data_create_from_pattern(job->pattern, NULL)))
+ {
+ return(PCRS_ERR_NOMEM);
+ }
+ offsets = pcre2_get_ovector_pointer(pcre2_matches);
+#endif
if (NULL == (matches = (pcrs_match *)malloc((size_t)max_matches * sizeof(pcrs_match))))
{
return(PCRS_ERR_NOMEM);
}
memset(matches, '\0', (size_t)max_matches * sizeof(pcrs_match));
-
/*
* Find the pattern and calculate the space
* requirements for the result
*/
newsize = subject_length;
+#ifdef HAVE_PCRE2
+ while ((submatches = pcre2_match(job->pattern, (const unsigned char *)subject,
+ subject_length, (size_t)offset, 0, pcre2_matches, NULL)) > 0)
+#else
while ((submatches = pcre_exec(job->pattern, job->hints, subject, (int)subject_length, offset, 0, offsets, 3 * PCRS_MAX_SUBMATCHES)) > 0)
+#endif
{
job->flags |= PCRS_SUCCESS;
matches[i].submatches = submatches;
for (k = 0; k < submatches; k++)
{
- matches[i].submatch_offset[k] = offsets[2 * k];
+ matches[i].submatch_offset[k] = (int)offsets[2 * k];
/* Note: Non-found optional submatches have length -1-(-1)==0 */
matches[i].submatch_length[k] = (size_t)(offsets[2 * k + 1] - offsets[2 * k]);
@@ -867,7 +917,7 @@ int pcrs_execute(pcrs_job *job, const ch
newsize += (size_t)offsets[0] * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES];
/* chunk after match */
- matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = offsets[1];
+ matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = (int)offsets[1];
matches[i].submatch_length[PCRS_MAX_SUBMATCHES + 1] = subject_length - (size_t)offsets[1] - 1;
newsize += (subject_length - (size_t)offsets[1]) * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES + 1];
@@ -894,12 +944,19 @@ int pcrs_execute(pcrs_job *job, const ch
break;
/* Go find the next one */
else
- offset = offsets[1];
+ offset = (int)offsets[1];
}
/* Pass pcre error through if (bad) failure */
+#ifdef HAVE_PCRE2
+ if (submatches < PCRE2_ERROR_NOMATCH)
+#else
if (submatches < PCRE_ERROR_NOMATCH)
+#endif
{
free(matches);
+#ifdef HAVE_PCRE2
+ pcre2_match_data_free(pcre2_matches);
+#endif
return submatches;
}
matches_found = i;
@@ -909,9 +966,19 @@ int pcrs_execute(pcrs_job *job, const ch
* Get memory for the result (must be freed by caller!)
* and append terminating null byte.
*/
- if ((*result = (char *)malloc(newsize + 1)) == NULL)
+ if ((*result = (char *)malloc(newsize + 1
+#ifdef HAVE_PCRE2
+ /*
+ * Work around to prevent invalid reads in the jit code.
+ */
+ + 16
+#endif
+ )) == NULL)
{
free(matches);
+#ifdef HAVE_PCRE2
+ pcre2_match_data_free(pcre2_matches);
+#endif
return PCRS_ERR_NOMEM;
}
else
@@ -964,6 +1031,9 @@ int pcrs_execute(pcrs_job *job, const ch
memcpy(result_offset, subject + offset, subject_length - (size_t)offset);
*result_length = newsize;
+#ifdef HAVE_PCRE2
+ pcre2_match_data_free(pcre2_matches);
+#endif
free(matches);
return matches_found;
@@ -1101,7 +1171,7 @@ char pcrs_get_delimiter(const char *stri
*********************************************************************/
char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, int *hits)
{
- size_t size;
+ size_t buffer_size, new_size;
char *result = NULL;
pcrs_job *job;
@@ -1109,12 +1179,14 @@ char *pcrs_execute_single_command(const
assert(pcrs_command);
*hits = 0;
- size = strlen(subject);
+ buffer_size = strlen(subject);
job = pcrs_compile_command(pcrs_command, hits);
if (NULL != job)
{
- *hits = pcrs_execute(job, subject, size, &result, &size);
+ *hits = pcrs_execute(job, subject, buffer_size, &result, &new_size);
+ buffer_size = new_size;
+
if (*hits < 0)
{
freez(result);
--- a/pcrs.h
+++ b/pcrs.h
@@ -33,9 +33,18 @@
*********************************************************************/
+#ifdef HAVE_PCRE2
+#define PCRE2_CODE_UNIT_WIDTH 8
+#define PCREn(x) PCRE2_ ## x
+#ifndef _PCRE2_H
+#include <pcre2.h>
+#endif
+#else
+#define PCREn(x) PCRE_ ## x
#ifndef _PCRE_H
#include <pcre.h>
#endif
+#endif
/*
* Constants:
@@ -55,22 +64,23 @@
* They are supposed to be handled together with PCRE error
* codes and have to start with an offset to prevent overlaps.
*
- * PCRE 6.7 uses error codes from -1 to -21, PCRS error codes
- * below -100 should be safe for a while.
+ * PCRE 6.7 uses error codes from -1 to -21,
+ * PCRE2 10.42 uses error codes from -66 to 101.
+ * PCRS error codes below -300 should be safe for a while.
*/
-#define PCRS_ERR_NOMEM -100 /* Failed to acquire memory. */
-#define PCRS_ERR_CMDSYNTAX -101 /* Syntax of s///-command */
-#define PCRS_ERR_STUDY -102 /* pcre error while studying the pattern */
-#define PCRS_ERR_BADJOB -103 /* NULL job pointer, pattern or substitute */
-#define PCRS_WARN_BADREF -104 /* Backreference out of range */
-#define PCRS_WARN_TRUNCATION -105 /* At least one pcrs variable was too big,
+#define PCRS_ERR_NOMEM -300 /* Failed to acquire memory. */
+#define PCRS_ERR_CMDSYNTAX -301 /* Syntax of s///-command */
+#define PCRS_ERR_STUDY -302 /* pcre error while studying the pattern */
+#define PCRS_ERR_BADJOB -303 /* NULL job pointer, pattern or substitute */
+#define PCRS_WARN_BADREF -304 /* Backreference out of range */
+#define PCRS_WARN_TRUNCATION -305 /* At least one pcrs variable was too big,
* only the first part was used. */
/* Flags */
-#define PCRS_GLOBAL 1 /* Job should be applied globally, as with perl's g option */
-#define PCRS_TRIVIAL 2 /* Backreferences in the substitute are ignored */
-#define PCRS_SUCCESS 4 /* Job did previously match */
-#define PCRS_DYNAMIC 8 /* Job is dynamic (used to disable JIT compilation) */
+#define PCRS_GLOBAL 0x08000000u /* Job should be applied globally, as with perl's g option */
+#define PCRS_TRIVIAL 0x10000000u /* Backreferences in the substitute are ignored */
+#define PCRS_SUCCESS 0x20000000u /* Job did previously match */
+#define PCRS_DYNAMIC 0x40000000u /* Job is dynamic (used to disable JIT compilation) */
/*
@@ -107,10 +117,14 @@ typedef struct {
/* A PCRS job */
typedef struct PCRS_JOB {
+#ifdef HAVE_PCRE2
+ pcre2_code *pattern;
+#else
pcre *pattern; /* The compiled pcre pattern */
pcre_extra *hints; /* The pcre hints for the pattern */
+#endif
int options; /* The pcre options (numeric) */
- int flags; /* The pcrs and user flags (see "Flags" above) */
+ unsigned int flags; /* The pcrs and user flags (see "Flags" above) */
pcrs_substitute *substitute; /* The compiled pcrs substitute */
struct PCRS_JOB *next; /* Pointer for chaining jobs to joblists */
} pcrs_job;
--- a/project.h
+++ b/project.h
@@ -94,12 +94,38 @@
*/
#ifdef STATIC_PCRE
+#ifdef HAVE_PCRE2
+# include "pcre2.h"
+# include "pcre2posix.h"
+#else
# include "pcre.h"
+# include "pcreposix.h"
+#endif
#else
-# ifdef PCRE_H_IN_SUBDIR
-# include <pcre/pcre.h>
+# ifdef HAVE_PCRE2
+# ifdef PCRE2_H_IN_SUBDIR
+# define PCRE2_CODE_UNIT_WIDTH 8
+# include <pcre2/pcre2.h>
+# else
+# define PCRE2_CODE_UNIT_WIDTH 8
+# include <pcre2.h>
+# endif
+# ifdef PCRE2POSIX_H_IN_SUBDIR
+# include <pcre2/pcre2posix.h>
+# else
+# include <pcre2posix.h>
+# endif
# else
-# include <pcre.h>
+# ifdef PCRE_H_IN_SUBDIR
+# include <pcre/pcre.h>
+# else
+# include <pcre.h>
+# endif
+# ifdef PCREPOSIX_H_IN_SUBDIR
+# include <pcre/pcreposix.h>
+# else
+# include <pcreposix.h>
+# endif
# endif
#endif
@@ -109,16 +135,6 @@
# include <pcrs.h>
#endif
-#ifdef STATIC_PCRE
-# include "pcreposix.h"
-#else
-# ifdef PCRE_H_IN_SUBDIR
-# include <pcre/pcreposix.h>
-# else
-# include <pcreposix.h>
-# endif
-#endif
-
#ifdef _WIN32
/*
* I don't want to have to #include all this just for the declaration
@@ -404,10 +420,16 @@ struct http_response
enum crunch_reason crunch_reason; /**< Why the response was generated in the first place. */
};
+#ifdef HAVE_PCRE2
+#define REGEX_TYPE pcre2_code
+#else
+#define REGEX_TYPE regex_t
+#endif
+
struct url_spec
{
#ifdef FEATURE_PCRE_HOST_PATTERNS
- regex_t *host_regex;/**< Regex for host matching */
+ REGEX_TYPE *host_regex;/**< Regex for host matching */
enum host_regex_type { VANILLA_HOST_PATTERN, PCRE_HOST_PATTERN } host_regex_type;
#endif /* defined FEATURE_PCRE_HOST_PATTERNS */
int dcount; /**< How many parts to this domain? (length of dvec) */
@@ -417,7 +439,7 @@ struct url_spec
char *port_list; /**< List of acceptable ports, or NULL to match all ports */
- regex_t *preg; /**< Regex for matching path part */
+ REGEX_TYPE *preg; /**< Regex for matching path part */
};
/**
@@ -432,7 +454,7 @@ struct pattern_spec
union
{
struct url_spec url_spec;
- regex_t *tag_regex;
+ REGEX_TYPE *tag_regex;
} pattern;
unsigned int flags; /**< Bitmap with various pattern properties. */
--- a/templates/show-status
+++ b/templates/show-status
@@ -298,10 +298,7 @@
<tr>
<td><code>FEATURE_DYNAMIC_PCRE</code></td>
<td>@if-FEATURE_DYNAMIC_PCRE-then@ Yes @else-not-FEATURE_DYNAMIC_PCRE@ No @endif-FEATURE_DYNAMIC_PCRE@</td>
- <td>Dynamically link to the PCRE library. This is set automatically
- by <code>./configure</code> if you do not have libpcre installed.
- Dynamically linking to an external libpcre is recommended as the one that is distributed
- with Privoxy itself is outdated and lacks various features and bug-fixes you may be interested in.</td>
+ <td>Dynamically link to the PCRE(2) library (recommended).</td>
</tr>
<tr>
<td><code>FEATURE_EXTENDED_STATISTICS</code></td>
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -604,6 +604,100 @@ jb_err parse_http_request(const char *re
}
+#ifdef HAVE_PCRE2
+/*********************************************************************
+ *
+ * Function : compile_pattern
+ *
+ * Description : Compiles a host, domain or TAG pattern.
+ *
+ * Parameters :
+ * 1 : pattern = The pattern to compile.
+ * 2 : anchoring = How the regex should be modified
+ * before compilation. Can be either
+ * one of NO_ANCHORING, LEFT_ANCHORED,
+ * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
+ * 3 : url = In case of failures, the spec member is
+ * logged and the structure freed.
+ * 4 : regex = Where the compiled regex should be stored.
+ *
+ * Returns : JB_ERR_OK - Success
+ * JB_ERR_PARSE - Cannot parse regex
+ *
+ *********************************************************************/
+static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
+ struct pattern_spec *url, pcre2_code **regex)
+{
+ int errcode;
+ const char *fmt = NULL;
+ char *rebuf;
+ size_t rebuf_size;
+ PCRE2_SIZE error_offset;
+ int ret;
+
+ assert(pattern);
+
+ if (pattern[0] == '\0')
+ {
+ *regex = NULL;
+ return JB_ERR_OK;
+ }
+
+ switch (anchoring)
+ {
+ case NO_ANCHORING:
+ fmt = "%s";
+ break;
+ case RIGHT_ANCHORED:
+ fmt = "%s$";
+ break;
+ case RIGHT_ANCHORED_HOST:
+ fmt = "%s\\.?$";
+ break;
+ case LEFT_ANCHORED:
+ fmt = "^%s";
+ break;
+ default:
+ log_error(LOG_LEVEL_FATAL,
+ "Invalid anchoring in compile_pattern %d", anchoring);
+ }
+ rebuf_size = strlen(pattern) + strlen(fmt);
+ rebuf = malloc_or_die(rebuf_size);
+
+ snprintf(rebuf, rebuf_size, fmt, pattern);
+
+ *regex = pcre2_compile((const unsigned char *)pattern,
+ PCRE2_ZERO_TERMINATED, PCRE2_CASELESS, &errcode,
+ &error_offset, NULL);
+ if (*regex == NULL)
+ {
+ log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
+ pattern, url->spec, rebuf);
+ freez(rebuf);
+
+ return JB_ERR_PARSE;
+ }
+
+#ifndef DISABLE_PCRE_JIT_COMPILATION
+ /* Try to enable JIT compilation but continue if it's unsupported. */
+ if ((ret = pcre2_jit_compile(*regex, PCRE2_JIT_COMPLETE)) &&
+ (ret != PCRE2_ERROR_JIT_BADOPTION))
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Unexpected error enabling JIT compilation for %s from %s: %s",
+ pattern, url->spec, rebuf);
+ freez(rebuf);
+
+ return JB_ERR_PARSE;
+ }
+#endif
+
+ freez(rebuf);
+
+ return JB_ERR_OK;
+
+}
+#else
/*********************************************************************
*
* Function : compile_pattern
@@ -686,6 +780,7 @@ static jb_err compile_pattern(const char
return JB_ERR_OK;
}
+#endif
/*********************************************************************
@@ -1051,6 +1146,49 @@ static int simplematch(const char *patte
}
+#ifdef HAVE_PCRE2
+/*********************************************************************
+ *
+ * Function : pcre2_pattern_matches
+ *
+ * Description : Checks if a compiled pcre2 pattern matches a string.
+ *
+ * Parameters :
+ * 1 : pattern = The compiled pattern
+ * 2 : string = The string to check
+ *
+ * Returns : TRUE for yes, FALSE otherwise.
+ *
+ *********************************************************************/
+int pcre2_pattern_matches(const pcre2_code *pattern, const char *string)
+{
+ PCRE2_SIZE offset;
+ int ret;
+ pcre2_match_data *pcre2_matches;
+
+ assert(pattern != NULL);
+ assert(string != NULL);
+
+ offset = 0;
+
+ pcre2_matches = pcre2_match_data_create_from_pattern(pattern, NULL);
+ if (NULL == pcre2_matches)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Out of memory while matching pattern against %s", string);
+ return FALSE;
+ }
+
+ ret = pcre2_match(pattern, (const unsigned char *)string, strlen(string),
+ offset, 0, pcre2_matches, NULL);
+
+ pcre2_match_data_free(pcre2_matches);
+
+ return (ret >= 0);
+}
+#endif
+
+
/*********************************************************************
*
* Function : simple_domaincmp
@@ -1268,8 +1406,12 @@ void free_pattern_spec(struct pattern_sp
{
if (pattern->pattern.tag_regex)
{
+#ifdef HAVE_PCRE2
+ pcre2_code_free(pattern->pattern.tag_regex);
+#else
regfree(pattern->pattern.tag_regex);
freez(pattern->pattern.tag_regex);
+#endif
}
return;
}
@@ -1277,8 +1419,12 @@ void free_pattern_spec(struct pattern_sp
#ifdef FEATURE_PCRE_HOST_PATTERNS
if (pattern->pattern.url_spec.host_regex)
{
+#ifdef HAVE_PCRE2
+ pcre2_code_free(pattern->pattern.url_spec.host_regex);
+#else
regfree(pattern->pattern.url_spec.host_regex);
freez(pattern->pattern.url_spec.host_regex);
+#endif
}
#endif /* def FEATURE_PCRE_HOST_PATTERNS */
freez(pattern->pattern.url_spec.dbuffer);
@@ -1287,8 +1433,12 @@ void free_pattern_spec(struct pattern_sp
freez(pattern->pattern.url_spec.port_list);
if (pattern->pattern.url_spec.preg)
{
+#ifdef HAVE_PCRE2
+ pcre2_code_free(pattern->pattern.url_spec.preg);
+#else
regfree(pattern->pattern.url_spec.preg);
freez(pattern->pattern.url_spec.preg);
+#endif
}
}
@@ -1333,8 +1483,13 @@ static int host_matches(const struct htt
if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
{
return ((NULL == pattern->pattern.url_spec.host_regex)
+#ifdef HAVE_PCRE2
+ || pcre2_pattern_matches(pattern->pattern.url_spec.host_regex,
+ http->host));
+#else
|| (0 == regexec(pattern->pattern.url_spec.host_regex,
http->host, 0, NULL, 0)));
+#endif
}
#endif
return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
@@ -1357,7 +1512,11 @@ static int host_matches(const struct htt
static int path_matches(const char *path, const struct pattern_spec *pattern)
{
return ((NULL == pattern->pattern.url_spec.preg)
+#ifdef HAVE_PCRE2
+ || (pcre2_pattern_matches(pattern->pattern.url_spec.preg, path)));
+#else
|| (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
+#endif
}
--- a/urlmatch.h
+++ b/urlmatch.h
@@ -50,6 +50,10 @@ extern int url_requires_percent_encoding
extern int url_match(const struct pattern_spec *pattern,
const struct http_request *http);
+#ifdef HAVE_PCRE2
+extern int pcre2_pattern_matches(const pcre2_code *pattern, const char *string);
+#endif
+
extern jb_err create_pattern_spec(struct pattern_spec *url, char *buf);
extern void free_pattern_spec(struct pattern_spec *url);
extern int match_portlist(const char *portlist, int port);
--- a/w32log.c
+++ b/w32log.c
@@ -316,6 +316,9 @@ void TermLogWindow(void)
void LogCreatePatternMatchingBuffers(void)
{
int i;
+#ifdef HAVE_PCRE2
+#warning The win32 build of Privoxy is expected to crash when compiled with pcre2 support.
+#endif
for (i = 0; patterns_to_highlight[i].str != NULL; i++)
{
regcomp(&patterns_to_highlight[i].buffer, patterns_to_highlight[i].str, REG_ICASE);