zsh: backport PCRE2 patches and move to it

Backport PCRE2 patches from upstream and move package to PCRE2 library
as PCRE is EOL and won't receive any security update anymore.

Patch are backported with minimal change, only the Changelog change is
commented out as it would conflict and makes no sense to adapt for the
purpose of backport patches.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
(cherry picked from commit 5b929fde5f)
This commit is contained in:
Christian Marangi 2023-10-10 12:29:49 +02:00 committed by Josef Schlehofer
parent d6a5230862
commit 02176ce13b
6 changed files with 1060 additions and 2 deletions

View File

@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk
PKG_NAME:=zsh
PKG_VERSION:=5.9
PKG_RELEASE:=1
PKG_RELEASE:=2
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz
PKG_SOURCE_URL:=@SF/zsh
@ -39,7 +39,7 @@ define Package/zsh
CATEGORY:=Utilities
SUBMENU:=Shells
TITLE:=The Z shell
DEPENDS:=+libcap +libncurses +libncursesw +libpcre +librt
DEPENDS:=+libcap +libncurses +libncursesw +libpcre2 +librt
URL:=https://www.zsh.org/
endef

View File

@ -0,0 +1,71 @@
From 1b421e4978440234fb73117c8505dad1ccc68d46 Mon Sep 17 00:00:00 2001
From: Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
Date: Mon, 26 Sep 2022 10:52:50 +0900
Subject: [PATCH] 50658 + test: Enable to switch between C/UTF-8 locales in
PCRE
---
ChangeLog | 5 +++++
Src/Modules/pcre.c | 10 ++--------
Test/V07pcre.ztst | 11 +++++++++++
3 files changed, 18 insertions(+), 8 deletions(-)
# diff --git a/ChangeLog b/ChangeLog
# index 48c65d01b..77345c050 100644
# --- a/ChangeLog
# +++ b/ChangeLog
# @@ -1,3 +1,8 @@
# +2022-09-26 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
# +
# + * 50658 + test: Src/Modules/pcre.c, Test/V07pcre.ztst: Enable to
# + switch between C/UTF-8 locales in PCRE
# +
# 2022-09-25 Peter Stephenson <p.w.stephenson@ntlworld.com>
# * 50648: Functions/Misc/zcalc: Julian Prein: Use ZCALC_HISTFILE
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -47,8 +47,6 @@ zpcre_utf8_enabled(void)
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
static int have_utf8_pcre = -1;
- /* value can toggle based on MULTIBYTE, so don't
- * be too eager with caching */
if (have_utf8_pcre < -1)
return 0;
@@ -56,15 +54,11 @@ zpcre_utf8_enabled(void)
return 0;
if ((have_utf8_pcre == -1) &&
- (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
-
- if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
+ (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) {
have_utf8_pcre = -2; /* erk, failed to ask */
}
- if (have_utf8_pcre < 0)
- return 0;
- return have_utf8_pcre;
+ return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8"));
#else
return 0;
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -174,3 +174,14 @@
echo $match[2] )
0:regression for segmentation fault, workers/38307
>test
+
+ LANG_SAVE=$LANG
+ [[ é =~ '^.\z' ]]; echo $?
+ LANG=C
+ [[ é =~ '^..\z' ]]; echo $?
+ LANG=$LANG_SAVE
+ [[ é =~ '^.\z' ]]; echo $?
+0:swich between C/UTF-8 locales
+>0
+>0
+>0

View File

@ -0,0 +1,540 @@
From b62e911341c8ec7446378b477c47da4256053dc0 Mon Sep 17 00:00:00 2001
From: Oliver Kiddle <opk@zsh.org>
Date: Sat, 13 May 2023 00:53:32 +0200
Subject: [PATCH] 51723: migrate pcre module to pcre2
---
ChangeLog | 3 +
Src/Modules/pcre.c | 223 ++++++++++++++++++---------------------------
Test/V07pcre.ztst | 13 ++-
configure.ac | 20 ++--
4 files changed, 110 insertions(+), 149 deletions(-)
# diff --git a/ChangeLog b/ChangeLog
# index f5c77f801..285b73b2c 100644
# --- a/ChangeLog
# +++ b/ChangeLog
# @@ -1,5 +1,8 @@
# 2023-05-13 Oliver Kiddle <opk@zsh.org>
# + * 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac:
# + migrate pcre module to pcre2
# +
# * Felipe Contreras: 50612: Misc/vcs_info-examples: fix typo
# * github #98: Vidhan Bhatt: Completion/Darwin/Command/_shortcuts:
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -34,11 +34,11 @@
#define CPCRE_PLAIN 0
/**/
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
-#include <pcre.h>
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
-static pcre *pcre_pattern;
-static pcre_extra *pcre_hints;
+static pcre2_code *pcre_pattern;
/**/
static int
@@ -54,8 +54,8 @@ zpcre_utf8_enabled(void)
return 0;
if ((have_utf8_pcre == -1) &&
- (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) {
- have_utf8_pcre = -2; /* erk, failed to ask */
+ (pcre2_config(PCRE2_CONFIG_UNICODE, &have_utf8_pcre))) {
+ have_utf8_pcre = -2; /* erk, failed to ask */
}
return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8"));
@@ -69,47 +69,38 @@ zpcre_utf8_enabled(void)
static int
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
{
- int pcre_opts = 0, pcre_errptr, target_len;
- const char *pcre_error;
+ uint32_t pcre_opts = 0;
+ int target_len;
+ int pcre_error;
+ PCRE2_SIZE pcre_offset;
char *target;
- if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED;
- if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS;
- if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
- if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
- if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL;
+ if (OPT_ISSET(ops, 'a')) pcre_opts |= PCRE2_ANCHORED;
+ if (OPT_ISSET(ops, 'i')) pcre_opts |= PCRE2_CASELESS;
+ if (OPT_ISSET(ops, 'm')) pcre_opts |= PCRE2_MULTILINE;
+ if (OPT_ISSET(ops, 'x')) pcre_opts |= PCRE2_EXTENDED;
+ if (OPT_ISSET(ops, 's')) pcre_opts |= PCRE2_DOTALL;
if (zpcre_utf8_enabled())
- pcre_opts |= PCRE_UTF8;
-
-#ifdef HAVE_PCRE_STUDY
- if (pcre_hints)
-#ifdef PCRE_CONFIG_JIT
- pcre_free_study(pcre_hints);
-#else
- pcre_free(pcre_hints);
-#endif
- pcre_hints = NULL;
-#endif
+ pcre_opts |= PCRE2_UTF;
if (pcre_pattern)
- pcre_free(pcre_pattern);
+ pcre2_code_free(pcre_pattern);
pcre_pattern = NULL;
target = ztrdup(*args);
unmetafy(target, &target_len);
- if ((int)strlen(target) != target_len) {
- zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern");
- }
-
- pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL);
+ pcre_pattern = pcre2_compile((PCRE2_SPTR) target, (PCRE2_SIZE) target_len,
+ pcre_opts, &pcre_error, &pcre_offset, NULL);
free(target);
if (pcre_pattern == NULL)
{
- zwarnnam(nam, "error in regex: %s", pcre_error);
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(pcre_error, buffer, sizeof(buffer));
+ zwarnnam(nam, "error in regex: %s", buffer);
return 1;
}
@@ -117,67 +108,48 @@ bin_pcre_compile(char *nam, char **args,
}
/**/
-#ifdef HAVE_PCRE_STUDY
-
-/**/
static int
bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func))
{
- const char *pcre_error;
-
if (pcre_pattern == NULL)
{
zwarnnam(nam, "no pattern has been compiled for study");
return 1;
}
-
- if (pcre_hints)
-#ifdef PCRE_CONFIG_JIT
- pcre_free_study(pcre_hints);
-#else
- pcre_free(pcre_hints);
-#endif
- pcre_hints = NULL;
- pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error);
- if (pcre_error != NULL)
- {
- zwarnnam(nam, "error while studying regex: %s", pcre_error);
- return 1;
+ int jit = 0;
+ if (!pcre2_config(PCRE2_CONFIG_JIT, &jit) && jit) {
+ if (pcre2_jit_compile(pcre_pattern, PCRE2_JIT_COMPLETE) < 0) {
+ zwarnnam(nam, "error while studying regex");
+ return 1;
+ }
}
return 0;
}
-/**/
-#else /* !HAVE_PCRE_STUDY */
-
-# define bin_pcre_study bin_notavail
-
-/**/
-#endif /* !HAVE_PCRE_STUDY */
-
-/**/
static int
-zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
- char *substravar, int want_offset_pair, int matchedinarr,
- int want_begin_end)
+zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
+ char *matchvar, char *substravar, int want_offset_pair,
+ int matchedinarr, int want_begin_end)
{
- char **captures, *match_all, **matches;
+ PCRE2_SIZE *ovec;
+ char *match_all, **matches;
char offset_all[50];
int capture_start = 1;
if (matchedinarr) {
- /* bash-style captures[0] entire-matched string in the array */
+ /* bash-style ovec[0] entire-matched string in the array */
capture_start = 0;
}
- /* captures[0] will be entire matched string, [1] first substring */
- if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) {
- int nelem = arrlen(captures)-1;
+ /* ovec[0] will be entire matched string, [1] first substring */
+ ovec = pcre2_get_ovector_pointer(mdata);
+ if (ovec) {
+ int nelem = captured_count - 1;
/* Set to the offsets of the complete match */
if (want_offset_pair) {
- sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
+ sprintf(offset_all, "%ld %ld", ovec[0], ovec[1]);
setsparam("ZPCRE_OP", ztrdup(offset_all));
}
/*
@@ -186,7 +158,7 @@ zpcre_get_substrings(char *arg, int *ove
* ovec is length 2*(1+capture_list_length)
*/
if (matchvar) {
- match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP);
+ match_all = metafy(arg + ovec[0], ovec[1] - ovec[0], META_DUP);
setsparam(matchvar, match_all);
}
/*
@@ -201,16 +173,12 @@ zpcre_get_substrings(char *arg, int *ove
*/
if (substravar &&
(!want_begin_end || nelem)) {
- char **x, **y;
+ char **x;
int vec_off, i;
- y = &captures[capture_start];
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
- for (i = capture_start; i < captured_count; i++, y++) {
+ for (i = capture_start; i < captured_count; i++) {
vec_off = 2*i;
- if (*y)
- *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP);
- else
- *x++ = NULL;
+ *x++ = metafy(arg + ovec[vec_off], ovec[vec_off+1]-ovec[vec_off], META_DUP);
}
*x = NULL;
setaparam(substravar, matches);
@@ -247,7 +215,8 @@ zpcre_get_substrings(char *arg, int *ove
setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
if (nelem) {
char **mbegin, **mend, **bptr, **eptr;
- int i, *ipair;
+ int i;
+ size_t *ipair;
bptr = mbegin = zalloc(sizeof(char*)*(nelem+1));
eptr = mend = zalloc(sizeof(char*)*(nelem+1));
@@ -287,8 +256,6 @@ zpcre_get_substrings(char *arg, int *ove
setaparam("mend", mend);
}
}
-
- pcre_free_substring_list((const char **)captures);
}
return 0;
@@ -314,7 +281,8 @@ getposint(char *instr, char *nam)
static int
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
{
- int ret, capcount, *ovec, ovecsize, c;
+ int ret, c;
+ pcre2_match_data *pcre_mdata = NULL;
char *matched_portion = NULL;
char *plaintext = NULL;
char *receptacle = NULL;
@@ -344,36 +312,30 @@ bin_pcre_match(char *nam, char **args, O
/* For the entire match, 'Return' the offset byte positions instead of the matched string */
if(OPT_ISSET(ops,'b')) want_offset_pair = 1;
- if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
- {
- zwarnnam(nam, "error %d in fullinfo", ret);
- return 1;
- }
-
- ovecsize = (capcount+1)*3;
- ovec = zalloc(ovecsize*sizeof(int));
-
plaintext = ztrdup(*args);
unmetafy(plaintext, &subject_len);
if (offset_start > 0 && offset_start >= subject_len)
- ret = PCRE_ERROR_NOMATCH;
- else
- ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize);
+ ret = PCRE2_ERROR_NOMATCH;
+ else {
+ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL);
+ ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
+ offset_start, 0, pcre_mdata, NULL);
+ }
if (ret==0) return_value = 0;
- else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
+ else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
- zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle,
+ zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle,
want_offset_pair, 0, 0);
return_value = 0;
}
else {
- zwarnnam(nam, "error in pcre_exec [%d]", ret);
+ zwarnnam(nam, "error in pcre2_match [%d]", ret);
}
- if (ovec)
- zfree(ovec, ovecsize*sizeof(int));
+ if (pcre_mdata)
+ pcre2_match_data_free(pcre_mdata);
zsfree(plaintext);
return return_value;
@@ -383,17 +345,19 @@ bin_pcre_match(char *nam, char **args, O
static int
cond_pcre_match(char **a, int id)
{
- pcre *pcre_pat;
- const char *pcre_err;
+ pcre2_code *pcre_pat = NULL;
+ int pcre_err;
+ PCRE2_SIZE pcre_erroff;
char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar;
- int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
+ int r = 0, pcre_opts = 0;
+ pcre2_match_data *pcre_mdata = NULL;
int lhstr_plain_len, rhre_plain_len;
int return_value = 0;
if (zpcre_utf8_enabled())
- pcre_opts |= PCRE_UTF8;
+ pcre_opts |= PCRE2_UTF;
if (isset(REMATCHPCRE) && !isset(CASEMATCH))
- pcre_opts |= PCRE_CASELESS;
+ pcre_opts |= PCRE2_CASELESS;
lhstr = cond_str(a,0,0);
rhre = cond_str(a,1,0);
@@ -401,9 +365,6 @@ cond_pcre_match(char **a, int id)
rhre_plain = ztrdup(rhre);
unmetafy(lhstr_plain, &lhstr_plain_len);
unmetafy(rhre_plain, &rhre_plain_len);
- pcre_pat = NULL;
- ov = NULL;
- ovsize = 0;
if (isset(BASHREMATCH)) {
svar = NULL;
@@ -415,27 +376,27 @@ cond_pcre_match(char **a, int id)
switch(id) {
case CPCRE_PLAIN:
- if ((int)strlen(rhre_plain) != rhre_plain_len) {
- zwarn("embedded NULs in PCRE pattern terminate pattern");
- }
- pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL);
- if (pcre_pat == NULL) {
- zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
+ if (!(pcre_pat = pcre2_compile((PCRE2_SPTR) rhre_plain,
+ (PCRE2_SIZE) rhre_plain_len, pcre_opts,
+ &pcre_err, &pcre_erroff, NULL)))
+ {
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(pcre_err, buffer, sizeof(buffer));
+ zwarn("failed to compile regexp /%s/: %s", rhre, buffer);
break;
}
- pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
- ovsize = (capcnt+1)*3;
- ov = zalloc(ovsize*sizeof(int));
- r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize);
- /* r < 0 => error; r==0 match but not enough size in ov
+ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pat, NULL);
+ r = pcre2_match(pcre_pat, (PCRE2_SPTR8) lhstr_plain, lhstr_plain_len,
+ 0, 0, pcre_mdata, NULL);
+ /* r < 0 => error; r==0 match but not enough size in match data
* r > 0 => (r-1) substrings found; r==1 => no substrings
*/
if (r==0) {
- zwarn("reportable zsh problem: pcre_exec() returned 0");
+ zwarn("reportable zsh problem: pcre2_match() returned 0");
return_value = 1;
break;
}
- else if (r==PCRE_ERROR_NOMATCH) {
+ else if (r == PCRE2_ERROR_NOMATCH) {
return_value = 0; /* no match */
break;
}
@@ -444,7 +405,7 @@ cond_pcre_match(char **a, int id)
break;
}
else if (r>0) {
- zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0,
+ zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0,
isset(BASHREMATCH),
!isset(BASHREMATCH));
return_value = 1;
@@ -457,10 +418,10 @@ cond_pcre_match(char **a, int id)
free(lhstr_plain);
if(rhre_plain)
free(rhre_plain);
+ if (pcre_mdata)
+ pcre2_match_data_free(pcre_mdata);
if (pcre_pat)
- pcre_free(pcre_pat);
- if (ov)
- zfree(ov, ovsize*sizeof(int));
+ pcre2_code_free(pcre_pat);
return return_value;
}
@@ -489,11 +450,11 @@ static struct builtin bintab[] = {
static struct features module_features = {
bintab, sizeof(bintab)/sizeof(*bintab),
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
cotab, sizeof(cotab)/sizeof(*cotab),
-#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
+#else /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */
NULL, 0,
-#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
+#endif /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */
NULL, 0,
NULL, 0,
0
@@ -540,19 +501,9 @@ cleanup_(Module m)
int
finish_(UNUSED(Module m))
{
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
-#ifdef HAVE_PCRE_STUDY
- if (pcre_hints)
-#ifdef PCRE_CONFIG_JIT
- pcre_free_study(pcre_hints);
-#else
- pcre_free(pcre_hints);
-#endif
- pcre_hints = NULL;
-#endif
-
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
if (pcre_pattern)
- pcre_free(pcre_pattern);
+ pcre2_code_free(pcre_pattern);
pcre_pattern = NULL;
#endif
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -129,12 +129,17 @@
>78884; ZPCRE_OP: 25 30
>90210; ZPCRE_OP: 31 36
-# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
+# Embedded NULs allowed in plaintext, in RE, pcre supports \0 as two-chars
[[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
print "${#MATCH}; ${#match[1]}; ${#match[2]}"
0:ensure ASCII NUL passes in and out of matched plaintext
>6; 3; 3
+# PCRE2 supports NULs also in the RE
+ [[ $'a\0b\0c' =~ $'^(.\0)+' ]] && print "${#MATCH}; ${#match[1]}"
+0:ensure ASCII NUL works also in the regex
+>4; 2
+
# Ensure the long-form infix operator works
[[ foo -pcre-match ^f..$ ]]
print $?
@@ -181,7 +186,11 @@
[[ é =~ '^..\z' ]]; echo $?
LANG=$LANG_SAVE
[[ é =~ '^.\z' ]]; echo $?
-0:swich between C/UTF-8 locales
+0:switch between C/UTF-8 locales
>0
>0
>0
+
+ [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}"
+0:empty capture
+>3; 1; 0
--- a/configure.ac
+++ b/configure.ac
@@ -438,7 +438,7 @@ fi],
dnl Do you want to look for pcre support?
AC_ARG_ENABLE(pcre,
-AS_HELP_STRING([--enable-pcre],[enable the search for the pcre library (may create run-time library dependencies)]))
+AS_HELP_STRING([--enable-pcre],[enable the search for the pcre2 library (may create run-time library dependencies)]))
dnl Do you want to look for capability support?
AC_ARG_ENABLE(cap,
@@ -662,13 +662,12 @@ AC_HEADER_SYS_WAIT
oldcflags="$CFLAGS"
if test x$enable_pcre = xyes; then
-AC_CHECK_PROG([PCRECONF], pcre-config, pcre-config)
-dnl Typically (meaning on this single RedHat 9 box in front of me)
-dnl pcre-config --cflags produces a -I output which needs to go into
+AC_CHECK_PROG([PCRECONF], pcre2-config, pcre2-config)
+dnl pcre2-config --cflags may produce a -I output which needs to go into
dnl CPPFLAGS else configure's preprocessor tests don't pick it up,
dnl producing a warning.
-if test "x$ac_cv_prog_PCRECONF" = xpcre-config; then
- CPPFLAGS="$CPPFLAGS `pcre-config --cflags`"
+if test "x$ac_cv_prog_PCRECONF" = xpcre2-config; then
+ CPPFLAGS="$CPPFLAGS `pcre2-config --cflags`"
fi
fi
@@ -678,9 +677,10 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h
locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \
unistd.h sys/capability.h \
utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
- netinet/in_systm.h pcre.h langinfo.h wchar.h stddef.h \
+ netinet/in_systm.h langinfo.h wchar.h stddef.h \
sys/stropts.h iconv.h ncurses.h ncursesw/ncurses.h \
ncurses/ncurses.h)
+AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8])
if test x$dynamic = xyes; then
AC_CHECK_HEADERS(dlfcn.h)
AC_CHECK_HEADERS(dl.h)
@@ -958,9 +958,7 @@ if test "x$ac_found_iconv" = "xyes"; the
fi
if test x$enable_pcre = xyes; then
-dnl pcre-config should probably be employed here
-dnl AC_SEARCH_LIBS(pcre_compile, pcre)
- LIBS="`$ac_cv_prog_PCRECONF --libs` $LIBS"
+ LIBS="`$ac_cv_prog_PCRECONF --libs8` $LIBS"
fi
dnl ---------------------
@@ -1323,7 +1321,7 @@ AC_CHECK_FUNCS(strftime strptime mktime
pathconf sysconf \
tgetent tigetflag tigetnum tigetstr setupterm initscr resize_term \
getcchar setcchar waddwstr wget_wch win_wch use_default_colors \
- pcre_compile pcre_study pcre_exec \
+ pcre2_compile_8 \
nl_langinfo \
erand48 open_memstream \
posix_openpt \

View File

@ -0,0 +1,180 @@
From f3f371deb376478176866fd770fbcf9bc0d0609f Mon Sep 17 00:00:00 2001
From: Oliver Kiddle <opk@zsh.org>
Date: Sat, 13 May 2023 00:56:48 +0200
Subject: [PATCH] 51728: assign pcre named capture groups to a hash
---
ChangeLog | 3 +++
Doc/Zsh/mod_pcre.yo | 10 ++++++----
Src/Modules/pcre.c | 43 +++++++++++++++++++++++++++++++++----------
Test/V07pcre.ztst | 14 ++++++++++++++
4 files changed, 56 insertions(+), 14 deletions(-)
# diff --git a/ChangeLog b/ChangeLog
# index 285b73b2c..2835a9405 100644
# --- a/ChangeLog
# +++ b/ChangeLog
# @@ -1,5 +1,8 @@
# 2023-05-13 Oliver Kiddle <opk@zsh.org>
# + * 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
# + Test/V07pcre.ztst: assign pcre named capture groups to a hash
# +
# * 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac:
# migrate pcre module to pcre2
--- a/Doc/Zsh/mod_pcre.yo
+++ b/Doc/Zsh/mod_pcre.yo
@@ -20,12 +20,12 @@ including those that indicate newline.
)
findex(pcre_study)
item(tt(pcre_study))(
-Studies the previously-compiled PCRE which may result in faster
-matching.
+Requests JIT compilation for the previously-compiled PCRE which
+may result in faster matching.
)
findex(pcre_match)
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \
-[ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
+[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
Returns successfully if tt(string) matches the previously-compiled
PCRE.
@@ -36,7 +36,9 @@ substrings, unless the tt(-a) option is
case it will set the array var(arr). Similarly, the variable
tt(MATCH) will be set to the entire matched portion of the
string, unless the tt(-v) option is given, in which case the variable
-var(var) will be set.
+var(var) will be set. Furthermore, any named captures will
+be stored in the associative array tt(.pcre.match) unless an
+alternative is given with tt(-A).
No variables are altered if there is no successful match.
A tt(-n) option starts searching for a match from the
byte var(offset) position in var(string). If the tt(-b) option is given,
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -129,14 +129,17 @@ bin_pcre_study(char *nam, UNUSED(char **
}
static int
-zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
- char *matchvar, char *substravar, int want_offset_pair,
- int matchedinarr, int want_begin_end)
+zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata,
+ int captured_count, char *matchvar, char *substravar, char *namedassoc,
+ int want_offset_pair, int matchedinarr, int want_begin_end)
{
PCRE2_SIZE *ovec;
char *match_all, **matches;
char offset_all[50];
int capture_start = 1;
+ int vec_off;
+ PCRE2_SPTR ntable; /* table of named captures */
+ uint32_t ncount, nsize;
if (matchedinarr) {
/* bash-style ovec[0] entire-matched string in the array */
@@ -174,7 +177,7 @@ zpcre_get_substrings(char *arg, pcre2_ma
if (substravar &&
(!want_begin_end || nelem)) {
char **x;
- int vec_off, i;
+ int i;
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
for (i = capture_start; i < captured_count; i++) {
vec_off = 2*i;
@@ -184,6 +187,23 @@ zpcre_get_substrings(char *arg, pcre2_ma
setaparam(substravar, matches);
}
+ if (!pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize)
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable))
+ {
+ char **hash, **hashptr;
+ uint32_t nidx;
+ hashptr = hash = (char **)zshcalloc((ncount+1)*2*sizeof(char *));
+ for (nidx = 0; nidx < ncount; nidx++) {
+ vec_off = (ntable[nsize * nidx] << 9) + 2 * ntable[nsize * nidx + 1];
+ /* would metafy the key but pcre limits characters in the name */
+ *hashptr++ = ztrdup((char *) ntable + nsize * nidx + 2);
+ *hashptr++ = metafy(arg + ovec[vec_off],
+ ovec[vec_off+1]-ovec[vec_off], META_DUP);
+ }
+ sethparam(namedassoc, hash);
+ }
+
if (want_begin_end) {
/*
* cond-infix rather than builtin; also not bash; so we set a bunch
@@ -286,6 +306,7 @@ bin_pcre_match(char *nam, char **args, O
char *matched_portion = NULL;
char *plaintext = NULL;
char *receptacle = NULL;
+ char *named = ".pcre.match";
int return_value = 1;
/* The subject length and offset start are both int values in pcre_exec */
int subject_len;
@@ -305,6 +326,9 @@ bin_pcre_match(char *nam, char **args, O
if(OPT_HASARG(ops,c='v')) {
matched_portion = OPT_ARG(ops,c);
}
+ if (OPT_HASARG(ops, c='A')) {
+ named = OPT_ARG(ops, c);
+ }
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
return 1;
@@ -326,8 +350,8 @@ bin_pcre_match(char *nam, char **args, O
if (ret==0) return_value = 0;
else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
- zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle,
- want_offset_pair, 0, 0);
+ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion,
+ receptacle, named, want_offset_pair, 0, 0);
return_value = 0;
}
else {
@@ -405,9 +429,8 @@ cond_pcre_match(char **a, int id)
break;
}
else if (r>0) {
- zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0,
- isset(BASHREMATCH),
- !isset(BASHREMATCH));
+ zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, r, svar, avar,
+ ".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH));
return_value = 1;
break;
}
@@ -443,7 +466,7 @@ static struct conddef cotab[] = {
static struct builtin bintab[] = {
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL),
+ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL),
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
};
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -194,3 +194,17 @@
[[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}"
0:empty capture
>3; 1; 0
+
+ [[ category/name-12345 =~ '(?x)^
+ (?<category> [^/]* ) /
+ (?<package>
+ (?<name> \w+ ) -
+ (?<version> \d+ ))$' ]]
+ typeset -p1 .pcre.match
+0:named captures
+>typeset -g -A .pcre.match=(
+> [category]=category
+> [name]=name
+> [package]=name-12345
+> [version]=12345
+>)

View File

@ -0,0 +1,156 @@
From b4d1c756f50909b4a13e5c8fe5f26f71e9d54f63 Mon Sep 17 00:00:00 2001
From: Oliver Kiddle <opk@zsh.org>
Date: Sat, 13 May 2023 00:59:00 +0200
Subject: [PATCH] 51738: support pcre's alternative DFA matching algorithm
---
ChangeLog | 3 +++
Doc/Zsh/mod_pcre.yo | 6 ++++-
Src/Modules/pcre.c | 53 ++++++++++++++++++++++++++++++---------------
Test/V07pcre.ztst | 5 +++++
4 files changed, 49 insertions(+), 18 deletions(-)
# diff --git a/ChangeLog b/ChangeLog
# index 2835a9405..18bc4a698 100644
# --- a/ChangeLog
# +++ b/ChangeLog
# @@ -1,5 +1,8 @@
# 2023-05-13 Oliver Kiddle <opk@zsh.org>
# + * 51738: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
# + Test/V07pcre.ztst: support pcre's DFA matching algorithm
# +
# * 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
# Test/V07pcre.ztst: assign pcre named capture groups to a hash
--- a/Doc/Zsh/mod_pcre.yo
+++ b/Doc/Zsh/mod_pcre.yo
@@ -25,7 +25,7 @@ may result in faster matching.
)
findex(pcre_match)
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \
-[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
+[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-bd) ] var(string))(
Returns successfully if tt(string) matches the previously-compiled
PCRE.
@@ -69,6 +69,10 @@ print -l $accum)
)
enditem()
+The option tt(-d) uses the alternative breadth-first DFA search algorithm of
+pcre. This sets tt(match), or the array given with tt(-a), to all the matches
+found from the same start point in the subject.
+
The tt(zsh/pcre) module makes available the following test condition:
startitem()
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -305,30 +305,29 @@ bin_pcre_match(char *nam, char **args, O
pcre2_match_data *pcre_mdata = NULL;
char *matched_portion = NULL;
char *plaintext = NULL;
- char *receptacle = NULL;
- char *named = ".pcre.match";
+ char *receptacle;
+ char *named = NULL;
int return_value = 1;
/* The subject length and offset start are both int values in pcre_exec */
int subject_len;
int offset_start = 0;
int want_offset_pair = 0;
+ int use_dfa = 0;
if (pcre_pattern == NULL) {
zwarnnam(nam, "no pattern has been compiled");
return 1;
}
- matched_portion = "MATCH";
- receptacle = "match";
- if(OPT_HASARG(ops,c='a')) {
- receptacle = OPT_ARG(ops,c);
- }
- if(OPT_HASARG(ops,c='v')) {
- matched_portion = OPT_ARG(ops,c);
- }
- if (OPT_HASARG(ops, c='A')) {
- named = OPT_ARG(ops, c);
+ if (!(use_dfa = OPT_ISSET(ops, 'd'))) {
+ matched_portion = OPT_HASARG(ops, c='v') ? OPT_ARG(ops, c) : "MATCH";
+ named = OPT_HASARG(ops, c='A') ? OPT_ARG(ops, c) : ".pcre.match";
+ } else if (OPT_HASARG(ops, c='v') || OPT_HASARG(ops, c='A')) {
+ zwarnnam(nam, "-d cannot be combined with -%c", c);
+ return 1;
}
+ receptacle = OPT_HASARG(ops, 'a') ? OPT_ARG(ops, 'a') : "match";
+
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
return 1;
@@ -341,7 +340,25 @@ bin_pcre_match(char *nam, char **args, O
if (offset_start > 0 && offset_start >= subject_len)
ret = PCRE2_ERROR_NOMATCH;
- else {
+ else if (use_dfa) {
+ PCRE2_SIZE old, wscount = 128, capcount = 128;
+ void *workspace = zhalloc(sizeof(int) * wscount);
+ pcre_mdata = pcre2_match_data_create(capcount, NULL);
+ do {
+ ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
+ offset_start, 0, pcre_mdata, NULL, (int *) workspace, wscount);
+ if (ret == PCRE2_ERROR_DFA_WSSIZE) {
+ old = wscount;
+ wscount += wscount / 2;
+ workspace = hrealloc(workspace, sizeof(int) * old, sizeof(int) * wscount);
+ } else if (ret == 0) {
+ capcount += capcount / 2;
+ pcre2_match_data_free(pcre_mdata);
+ pcre_mdata = pcre2_match_data_create(capcount, NULL);
+ } else
+ break;
+ } while(1);
+ } else {
pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL);
ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
offset_start, 0, pcre_mdata, NULL);
@@ -350,12 +367,14 @@ bin_pcre_match(char *nam, char **args, O
if (ret==0) return_value = 0;
else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
- zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion,
- receptacle, named, want_offset_pair, 0, 0);
+ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret,
+ matched_portion, receptacle, named, want_offset_pair, use_dfa, 0);
return_value = 0;
}
else {
- zwarnnam(nam, "error in pcre2_match [%d]", ret);
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(ret, buffer, sizeof(buffer));
+ zwarnnam(nam, "error in pcre matching for /%s/: %s", plaintext, buffer);
}
if (pcre_mdata)
@@ -466,7 +485,7 @@ static struct conddef cotab[] = {
static struct builtin bintab[] = {
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL),
+ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:bd", NULL),
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
};
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -208,3 +208,8 @@
> [package]=name-12345
> [version]=12345
>)
+
+ pcre_compile 'cat(er(pillar)?)?'
+ pcre_match -d 'the caterpillar catchment' && print $match
+0:pcre_match -d
+>caterpillar cater cat

View File

@ -0,0 +1,111 @@
From 10bdbd8b5b0b43445aff23dcd412f25cf6aa328a Mon Sep 17 00:00:00 2001
From: Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
Date: Tue, 20 Jun 2023 18:14:27 +0900
Subject: [PATCH] 51877: do not build pcre module if pcre2-config is not found
---
ChangeLog | 5 +++++
Src/Modules/pcre.mdd | 2 +-
configure.ac | 31 +++++++++++++++++++------------
3 files changed, 25 insertions(+), 13 deletions(-)
# diff --git a/ChangeLog b/ChangeLog
# index 14349dcf2..e89ffee1b 100644
# --- a/ChangeLog
# +++ b/ChangeLog
# @@ -1,3 +1,8 @@
# +2023-06-20 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
# +
# + * 51877: Src/Modules/pcre.mdd, configure.ac: do not build pcre
# + module if pcre2-config is not available.
# +
# 2023-06-19 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
# * 51862: Doc/Makefile.in, configure.ac: support texinfo-7.0
--- a/Src/Modules/pcre.mdd
+++ b/Src/Modules/pcre.mdd
@@ -1,5 +1,5 @@
name=zsh/pcre
-link=`if test x$enable_pcre = xyes && (pcre-config --version >/dev/null 2>/dev/null); then echo dynamic; else echo no; fi`
+link=`if test x$enable_pcre = xyes; then echo dynamic; else echo no; fi`
load=no
autofeatures="b:pcre_compile b:pcre_study b:pcre_match"
--- a/configure.ac
+++ b/configure.ac
@@ -440,6 +440,17 @@ dnl Do you want to look for pcre support
AC_ARG_ENABLE(pcre,
AS_HELP_STRING([--enable-pcre],[enable the search for the pcre2 library (may create run-time library dependencies)]))
+AC_ARG_VAR(PCRE_CONFIG, [pathname of pcre2-config if it is not in PATH])
+if test "x$enable_pcre" = xyes; then
+ AC_CHECK_PROG([PCRE_CONFIG], pcre2-config, pcre2-config)
+ if test "x$PCRE_CONFIG" = x; then
+ enable_pcre=no
+ AC_MSG_WARN([pcre2-config not found: pcre module is disabled.])
+ AC_MSG_NOTICE(
+ [Set PCRE_CONFIG to pathname of pcre2-config if it is not in PATH.])
+ fi
+fi
+
dnl Do you want to look for capability support?
AC_ARG_ENABLE(cap,
AS_HELP_STRING([--enable-cap],[enable the search for POSIX capabilities (may require additional headers to be added by hand)]))
@@ -660,15 +671,12 @@ AC_HEADER_DIRENT
AC_HEADER_STAT
AC_HEADER_SYS_WAIT
-oldcflags="$CFLAGS"
-if test x$enable_pcre = xyes; then
-AC_CHECK_PROG([PCRECONF], pcre2-config, pcre2-config)
dnl pcre2-config --cflags may produce a -I output which needs to go into
dnl CPPFLAGS else configure's preprocessor tests don't pick it up,
dnl producing a warning.
-if test "x$ac_cv_prog_PCRECONF" = xpcre2-config; then
- CPPFLAGS="$CPPFLAGS `pcre2-config --cflags`"
-fi
+if test "x$enable_pcre" = xyes; then
+ CPPFLAGS="`$PCRE_CONFIG --cflags` $CPPFLAGS"
+ AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8])
fi
AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
@@ -680,7 +688,6 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h
netinet/in_systm.h langinfo.h wchar.h stddef.h \
sys/stropts.h iconv.h ncurses.h ncursesw/ncurses.h \
ncurses/ncurses.h)
-AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8])
if test x$dynamic = xyes; then
AC_CHECK_HEADERS(dlfcn.h)
AC_CHECK_HEADERS(dl.h)
@@ -957,10 +964,6 @@ if test "x$ac_found_iconv" = "xyes"; the
[Define as const if the declaration of iconv() needs const.])
fi
-if test x$enable_pcre = xyes; then
- LIBS="`$ac_cv_prog_PCRECONF --libs8` $LIBS"
-fi
-
dnl ---------------------
dnl CHECK TERMCAP LIBRARY
dnl ---------------------
@@ -1321,7 +1324,6 @@ AC_CHECK_FUNCS(strftime strptime mktime
pathconf sysconf \
tgetent tigetflag tigetnum tigetstr setupterm initscr resize_term \
getcchar setcchar waddwstr wget_wch win_wch use_default_colors \
- pcre2_compile_8 \
nl_langinfo \
erand48 open_memstream \
posix_openpt \
@@ -1376,6 +1378,11 @@ if test x$zsh_cv_func_realpath_accepts_n
AC_DEFINE(REALPATH_ACCEPTS_NULL)
fi
+if test x$enable_pcre = xyes; then
+ LIBS="`$PCRE_CONFIG --libs8` $LIBS"
+ AC_CHECK_FUNCS(pcre2_compile_8)
+fi
+
if test x$enable_cap = xyes; then
AC_CHECK_FUNCS(cap_get_proc)
fi