openwrt-packages/libs/pcre/patches/001-pcre-8.38-upstream_fixe...

1409 lines
45 KiB
Diff

Submitted By: Ken Moffat <ken at linuxfromscratch dot org>
Date: 2016-03-16
Initial Package Version: 8.38
Upstream Status: Applied
Origin: Upstream, backported to 8.38 by Petr Písař at redhat
Description: Various fixes, including for CVE-2016-1263 and many other
bugs which have been fixed upstream. Many of these bugs were found by
fuzzing, upstream is trying to persuade its users to move to pcre2 and
giving low priority to further pcre1 maintenance releases.
From 3c80e02cd464ea049e117b423fd48fab294c51a9 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Thu, 26 Nov 2015 20:29:13 +0000
Subject: [PATCH] Fix auto-callout (?# comment bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1611 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Pisar: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 4d3b313..3360a8b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4699,6 +4699,23 @@ for (;; ptr++)
}
}
+ /* Skip over (?# comments. We need to do this here because we want to know if
+ the next thing is a quantifier, and these comments may come between an item
+ and its quantifier. */
+
+ if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
+ ptr[2] == CHAR_NUMBER_SIGN)
+ {
+ ptr += 3;
+ while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+ if (*ptr == CHAR_NULL)
+ {
+ *errorcodeptr = ERR18;
+ goto FAILED;
+ }
+ continue;
+ }
+
/* See if the next thing is a quantifier. */
is_quantifier =
@@ -6529,21 +6546,6 @@ for (;; ptr++)
case CHAR_LEFT_PARENTHESIS:
ptr++;
- /* First deal with comments. Putting this code right at the start ensures
- that comments have no bad side effects. */
-
- if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
- {
- ptr += 2;
- while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- if (*ptr == CHAR_NULL)
- {
- *errorcodeptr = ERR18;
- goto FAILED;
- }
- continue;
- }
-
/* Now deal with various "verbs" that can be introduced by '*'. */
if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
diff --git a/testdata/testinput2 b/testdata/testinput2
index e2e520f..92e3359 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4217,4 +4217,12 @@ backtracking verbs. --/
/a[[:punct:]b]/BZ
+/L(?#(|++<!(2)?/BZ
+
+/L(?#(|++<!(2)?/BOZ
+
+/L(?#(|++<!(2)?/BCZ
+
+/L(?#(|++<!(2)?/BCOZ
+
/-- End of testinput2 --/
diff --git a/testdata/testinput7 b/testdata/testinput7
index e411a4b..00b9738 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -853,4 +853,8 @@ of case for anything other than the ASCII letters. --/
/a[b[:punct:]]/8WBZ
+/L(?#(|++<!(2)?/B8COZ
+
+/L(?#(|++<!(2)?/B8WCZ
+
/-- End of testinput7 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 85c565d..2cf7a90 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14574,4 +14574,40 @@ No match
End
------------------------------------------------------------------
+/L(?#(|++<!(2)?/BZ
+------------------------------------------------------------------
+ Bra
+ L?+
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/BOZ
+------------------------------------------------------------------
+ Bra
+ L?
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/BCZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?+
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/BCOZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index cc9ebdd..fdfff64 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -2348,4 +2348,24 @@ No match
End
------------------------------------------------------------------
+/L(?#(|++<!(2)?/B8COZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B8WCZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?+
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput7 --/
--
2.4.3
From ef6b10fcde41a2687f38d4a9ff2886b037948a1b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Fri, 27 Nov 2015 17:13:13 +0000
Subject: [PATCH 1/5] Fix negated POSIX class within negated overall class UCP
bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1612 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 3360a8b..3670f1e 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5063,20 +5063,22 @@ for (;; ptr++)
ptr = tempptr + 1;
continue;
- /* For the other POSIX classes (ascii, xdigit) we are going to fall
- through to the non-UCP case and build a bit map for characters with
- code points less than 256. If we are in a negated POSIX class
- within a non-negated overall class, characters with code points
- greater than 255 must all match. In the special case where we have
- not yet generated any xclass data, and this is the final item in
- the overall class, we need do nothing: later on, the opcode
+ /* For the other POSIX classes (ascii, cntrl, xdigit) we are going
+ to fall through to the non-UCP case and build a bit map for
+ characters with code points less than 256. If we are in a negated
+ POSIX class, characters with code points greater than 255 must
+ either all match or all not match. In the special case where we
+ have not yet generated any xclass data, and this is the final item
+ in the overall class, we need do nothing: later on, the opcode
OP_NCLASS will be used to indicate that characters greater than 255
are acceptable. If we have already seen an xclass item or one may
follow (we have to assume that it might if this is not the end of
- the class), explicitly match all wide codepoints. */
+ the class), explicitly list all wide codepoints, which will then
+ either not match or match, depending on whether the class is or is
+ not negated. */
default:
- if (!negate_class && local_negate &&
+ if (local_negate &&
(xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
{
*class_uchardata++ = XCL_RANGE;
diff --git a/testdata/testinput6 b/testdata/testinput6
index aeb62a0..a178d3d 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -1553,4 +1553,13 @@
\x{200}
\x{37e}
+/[^[:^ascii:]\d]/8W
+ a
+ ~
+ 0
+ \a
+ \x{7f}
+ \x{389}
+ \x{20ac}
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index beb85aa..b64dc0d 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -2557,4 +2557,20 @@ No match
\x{37e}
0: \x{37e}
+/[^[:^ascii:]\d]/8W
+ a
+ 0: a
+ ~
+ 0: ~
+ 0
+No match
+ \a
+ 0: \x{07}
+ \x{7f}
+ 0: \x{7f}
+ \x{389}
+No match
+ \x{20ac}
+No match
+
/-- End of testinput6 --/
--
2.4.3
From bfc1dfa660c24dc7a75108d934290e50d7db2719 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Fri, 27 Nov 2015 17:41:04 +0000
Subject: [PATCH 2/5] Fix bug for isolated \E between an item and its qualifier
when auto callout is set.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1613 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 3670f1e..5786cd3 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4645,9 +4645,10 @@ for (;; ptr++)
goto FAILED;
}
- /* If in \Q...\E, check for the end; if not, we have a literal */
+ /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
+ isolated \E is ignored. */
- if (inescq && c != CHAR_NULL)
+ if (c != CHAR_NULL)
{
if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
{
@@ -4655,7 +4656,7 @@ for (;; ptr++)
ptr++;
continue;
}
- else
+ else if (inescq)
{
if (previous_callout != NULL)
{
@@ -4670,7 +4671,6 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
}
- /* Control does not reach here. */
}
/* In extended mode, skip white space and comments. We need a loop in order
diff --git a/testdata/testinput2 b/testdata/testinput2
index 92e3359..e8ca4fe 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4225,4 +4225,6 @@ backtracking verbs. --/
/L(?#(|++<!(2)?/BCOZ
+/(A*)\E+/CBZ
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2cf7a90..09756b8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14610,4 +14610,18 @@ No match
End
------------------------------------------------------------------
+/(A*)\E+/CBZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 7
+ SCBra 1
+ Callout 255 1 2
+ A*
+ Callout 255 3 0
+ KetRmax
+ Callout 255 7 0
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput2 --/
--
2.4.3
From 108377b836fc29a84f5286287629d96549b1c777 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sun, 29 Nov 2015 17:38:25 +0000
Subject: [PATCH 3/5] Give error for regexec with pmatch=NULL and REG_STARTEND
set.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1614 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcreposix.c b/pcreposix.c
index f024423..dcc13ef 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -364,6 +364,7 @@ start location rather than being passed as a PCRE "starting offset". */
if ((eflags & REG_STARTEND) != 0)
{
+ if (pmatch == NULL) return REG_INVARG;
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
}
--
2.4.3
From e347b40d5bb12f7ef1e632aa649571a107be7d8a Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sun, 29 Nov 2015 17:46:23 +0000
Subject: [PATCH 4/5] Allow for up to 32-bit numbers in the ordin() function in
pcregrep.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1615 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcregrep.c b/pcregrep.c
index 64986b0..cd53c64 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -2437,7 +2437,7 @@ return options;
static char *
ordin(int n)
{
-static char buffer[8];
+static char buffer[14];
char *p = buffer;
sprintf(p, "%d", n);
while (*p != 0) p++;
--
2.4.3
From e78ad4264b16988b826bd2939a1781c1165a92d9 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Mon, 30 Nov 2015 17:44:45 +0000
Subject: [PATCH 5/5] Fix \Q\E before qualifier bug when auto callouts are
enabled.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1616 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 5786cd3..beed46b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4671,17 +4671,27 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
}
+
+ /* Check for the start of a \Q...\E sequence. We must do this here rather
+ than later in case it is immediately followed by \E, which turns it into a
+ "do nothing" sequence. */
+
+ if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
+ {
+ inescq = TRUE;
+ ptr++;
+ continue;
+ }
}
- /* In extended mode, skip white space and comments. We need a loop in order
- to check for more white space and more comments after a comment. */
+ /* In extended mode, skip white space and comments. */
if ((options & PCRE_EXTENDED) != 0)
{
- for (;;)
+ const pcre_uchar *wscptr = ptr;
+ while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+ if (c == CHAR_NUMBER_SIGN)
{
- while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
- if (c != CHAR_NUMBER_SIGN) break;
ptr++;
while (*ptr != CHAR_NULL)
{
@@ -4695,7 +4705,15 @@ for (;; ptr++)
if (utf) FORWARDCHAR(ptr);
#endif
}
- c = *ptr; /* Either NULL or the char after a newline */
+ }
+
+ /* If we skipped any characters, restart the loop. Otherwise, we didn't see
+ a comment. */
+
+ if (ptr > wscptr)
+ {
+ ptr--;
+ continue;
}
}
@@ -7900,16 +7918,6 @@ for (;; ptr++)
c = ec;
else
{
- if (escape == ESC_Q) /* Handle start of quoted string */
- {
- if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
- ptr += 2; /* avoid empty string */
- else inescq = TRUE;
- continue;
- }
-
- if (escape == ESC_E) continue; /* Perl ignores an orphan \E */
-
/* For metasequences that actually match a character, we disable the
setting of a first character if it hasn't already been set. */
diff --git a/testdata/testinput2 b/testdata/testinput2
index e8ca4fe..3a1134f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4227,4 +4227,6 @@ backtracking verbs. --/
/(A*)\E+/CBZ
+/()\Q\E*]/BCZ
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 09756b8..ac33cc4 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14624,4 +14624,19 @@ No match
End
------------------------------------------------------------------
+/()\Q\E*]/BCZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 7
+ Brazero
+ SCBra 1
+ Callout 255 1 0
+ KetRmax
+ Callout 255 7 1
+ ]
+ Callout 255 8 0
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput2 --/
--
2.4.3
From 46ed1a703b067e5b679eacf6500a54dae35f8130 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Thu, 3 Dec 2015 17:05:40 +0000
Subject: [PATCH] Fix /x bug when pattern starts with white space and (?-x)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1617 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index beed46b..57719b9 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -7607,39 +7607,15 @@ for (;; ptr++)
newoptions = (options | set) & (~unset);
/* If the options ended with ')' this is not the start of a nested
- group with option changes, so the options change at this level. If this
- item is right at the start of the pattern, the options can be
- abstracted and made external in the pre-compile phase, and ignored in
- the compile phase. This can be helpful when matching -- for instance in
- caseless checking of required bytes.
-
- If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
- definitely *not* at the start of the pattern because something has been
- compiled. In the pre-compile phase, however, the code pointer can have
- that value after the start, because it gets reset as code is discarded
- during the pre-compile. However, this can happen only at top level - if
- we are within parentheses, the starting BRA will still be present. At
- any parenthesis level, the length value can be used to test if anything
- has been compiled at that level. Thus, a test for both these conditions
- is necessary to ensure we correctly detect the start of the pattern in
- both phases.
-
+ group with option changes, so the options change at this level.
If we are not at the pattern start, reset the greedy defaults and the
case value for firstchar and reqchar. */
if (*ptr == CHAR_RIGHT_PARENTHESIS)
{
- if (code == cd->start_code + 1 + LINK_SIZE &&
- (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
- {
- cd->external_options = newoptions;
- }
- else
- {
- greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
- greedy_non_default = greedy_default ^ 1;
- req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
- }
+ greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
+ greedy_non_default = greedy_default ^ 1;
+ req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
/* Change options at this level, and pass them back for use
in subsequent branches. */
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index ac33cc4..6c42897 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -419,7 +419,7 @@ Need char = '>'
/(?U)<.*>/I
Capturing subpattern count = 0
-Options: ungreedy
+No options
First char = '<'
Need char = '>'
abc<def>ghi<klm>nop
@@ -443,7 +443,7 @@ Need char = '='
/(?U)={3,}?/I
Capturing subpattern count = 0
-Options: ungreedy
+No options
First char = '='
Need char = '='
abc========def
@@ -477,7 +477,7 @@ Failed: lookbehind assertion is not fixed length at offset 12
/(?i)abc/I
Capturing subpattern count = 0
-Options: caseless
+No options
First char = 'a' (caseless)
Need char = 'c' (caseless)
@@ -489,7 +489,7 @@ No need char
/(?i)^1234/I
Capturing subpattern count = 0
-Options: anchored caseless
+Options: anchored
No first char
No need char
@@ -502,7 +502,7 @@ No need char
/(?s).*/I
Capturing subpattern count = 0
May match empty string
-Options: anchored dotall
+Options: anchored
No first char
No need char
@@ -516,7 +516,7 @@ Starting chars: a b c d
/(?i)[abcd]/IS
Capturing subpattern count = 0
-Options: caseless
+No options
No first char
No need char
Subject length lower bound = 1
@@ -524,7 +524,7 @@ Starting chars: A B C D a b c d
/(?m)[xy]|(b|c)/IS
Capturing subpattern count = 1
-Options: multiline
+No options
No first char
No need char
Subject length lower bound = 1
@@ -538,7 +538,7 @@ No need char
/(?i)(^a|^b)/Im
Capturing subpattern count = 1
-Options: caseless multiline
+Options: multiline
First char at start or follows newline
No need char
@@ -1179,7 +1179,7 @@ No need char
End
------------------------------------------------------------------
Capturing subpattern count = 1
-Options: anchored dotall
+Options: anchored
No first char
No need char
@@ -2735,7 +2735,7 @@ No match
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Options: caseless extended
+Options: extended
First char = 'a' (caseless)
Need char = 'c' (caseless)
@@ -2748,7 +2748,7 @@ Need char = 'c' (caseless)
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Options: caseless extended
+Options: extended
First char = 'a' (caseless)
Need char = 'c' (caseless)
@@ -3095,7 +3095,7 @@ Need char = 'b'
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Options: ungreedy
+No options
First char = 'x'
Need char = 'b'
xaaaab
@@ -3497,7 +3497,7 @@ Need char = 'c'
/(?i)[ab]/IS
Capturing subpattern count = 0
-Options: caseless
+No options
No first char
No need char
Subject length lower bound = 1
@@ -6299,7 +6299,7 @@ Capturing subpattern count = 3
Named capturing subpatterns:
A 2
A 3
-Options: anchored dupnames
+Options: anchored
Duplicate name status changes
No first char
No need char
--
2.4.3
From db1fb68feddc9afe6f8822d099fa9ff25e3ea8e7 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sat, 5 Dec 2015 16:30:14 +0000
Subject: [PATCH] Fix copy named substring bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1618 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_get.c b/pcre_get.c
index 8094b34..41eda9c 100644
--- a/pcre_get.c
+++ b/pcre_get.c
@@ -250,6 +250,7 @@ Arguments:
code the compiled regex
stringname the name of the capturing substring
ovector the vector of matched substrings
+ stringcount number of captured substrings
Returns: the number of the first that is set,
or the number of the last one if none are set,
@@ -258,13 +259,16 @@ Returns: the number of the first that is set,
#if defined COMPILE_PCRE8
static int
-get_first_set(const pcre *code, const char *stringname, int *ovector)
+get_first_set(const pcre *code, const char *stringname, int *ovector,
+ int stringcount)
#elif defined COMPILE_PCRE16
static int
-get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
+get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
+ int stringcount)
#elif defined COMPILE_PCRE32
static int
-get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
+get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
+ int stringcount)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)code;
@@ -295,7 +299,7 @@ if (entrysize <= 0) return entrysize;
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
{
int n = GET2(entry, 0);
- if (ovector[n*2] >= 0) return n;
+ if (n < stringcount && ovector[n*2] >= 0) return n;
}
return GET2(entry, 0);
}
@@ -402,7 +406,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
PCRE_UCHAR32 *buffer, int size)
#endif
{
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
if (n <= 0) return n;
#if defined COMPILE_PCRE8
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
@@ -619,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
PCRE_SPTR32 *stringptr)
#endif
{
-int n = get_first_set(code, stringname, ovector);
+int n = get_first_set(code, stringname, ovector, stringcount);
if (n <= 0) return n;
#if defined COMPILE_PCRE8
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
diff --git a/testdata/testinput2 b/testdata/testinput2
index 3a1134f..00ffe32 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4229,4 +4229,7 @@ backtracking verbs. --/
/()\Q\E*]/BCZ
+/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/
+ \O\CC
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 6c42897..ffb4466 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14639,4 +14639,9 @@ No match
End
------------------------------------------------------------------
+/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/
+ \O\CC
+Matched, but too many substrings
+copy substring C failed -7
+
/-- End of testinput2 --/
--
2.4.3
From 40363ebc19baeab160abaaa55dc84322a89ac35a Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sat, 5 Dec 2015 16:58:46 +0000
Subject: [PATCH] Fix (by hacking) another length computation issue.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1619 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 57719b9..087bf2a 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -7280,7 +7280,7 @@ for (;; ptr++)
issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
only mode, we finesse the bug by allowing more memory always. */
- *lengthptr += 2 + 2*LINK_SIZE;
+ *lengthptr += 4 + 4*LINK_SIZE;
/* It is even worse than that. The current reference may be to an
existing named group with a different number (so apparently not
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 9a0a12d..280692e 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -231,7 +231,7 @@ Memory allocation (code space): 73
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 77
+Memory allocation (code space): 93
------------------------------------------------------------------
0 24 Bra
2 5 CBra 1
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 57e5da0..cdbda74 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -231,7 +231,7 @@ Memory allocation (code space): 155
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 157
+Memory allocation (code space): 189
------------------------------------------------------------------
0 24 Bra
2 5 CBra 1
diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
index 748548a..cb37896 100644
--- a/testdata/testoutput11-8
+++ b/testdata/testoutput11-8
@@ -231,7 +231,7 @@ Memory allocation (code space): 45
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 50
+Memory allocation (code space): 62
------------------------------------------------------------------
0 30 Bra
3 7 CBra 1
--
2.4.3
From 4f47274a2eb10131d88145ad7fd0eed4027a0c51 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Tue, 8 Dec 2015 11:06:40 +0000
Subject: [PATCH] Fix get_substring_list() bug when \K is used in an assertion.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1620 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: ported to 8.38.
diff --git a/pcre_get.c b/pcre_get.c
index 41eda9c..cdd2abc 100644
--- a/pcre_get.c
+++ b/pcre_get.c
@@ -461,7 +461,10 @@ pcre_uchar **stringlist;
pcre_uchar *p;
for (i = 0; i < double_count; i += 2)
- size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
+ {
+ size += sizeof(pcre_uchar *) + IN_UCHARS(1);
+ if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
+ }
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
@@ -477,7 +480,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2)
{
- int len = ovector[i+1] - ovector[i];
+ int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
memcpy(p, subject + ovector[i], IN_UCHARS(len));
*stringlist++ = p;
p += len;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 00ffe32..967a241 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4232,4 +4232,7 @@ backtracking verbs. --/
/(?<A>)(?J:(?<B>)(?<B>))(?<C>)/
\O\CC
+/(?=a\K)/
+ ring bpattingbobnd $ 1,oern cou \rb\L
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index ffb4466..5fb28d5 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14644,4 +14644,10 @@ No match
Matched, but too many substrings
copy substring C failed -7
+/(?=a\K)/
+ ring bpattingbobnd $ 1,oern cou \rb\L
+Start of matched string is beyond its end - displaying from end to start.
+ 0: a
+ 0L
+
/-- End of testinput2 --/
--
2.5.0
From 3da5528b47b88c32224cf9d14d8a4e80cd7a0815 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sat, 6 Feb 2016 16:54:14 +0000
Subject: [PATCH] Fix pcretest bad behaviour for callout in lookbehind.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1625 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcretest.c b/pcretest.c
index 488e419..63869fd 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -2250,7 +2250,7 @@ data is not zero. */
static int callout(pcre_callout_block *cb)
{
FILE *f = (first_callout | callout_extra)? outfile : NULL;
-int i, pre_start, post_start, subject_length;
+int i, current_position, pre_start, post_start, subject_length;
if (callout_extra)
{
@@ -2280,14 +2280,19 @@ printed lengths of the substrings. */
if (f != NULL) fprintf(f, "--->");
+/* If a lookbehind is involved, the current position may be earlier than the
+match start. If so, use the match start instead. */
+
+current_position = (cb->current_position >= cb->start_match)?
+ cb->current_position : cb->start_match;
+
PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
PCHARS(post_start, cb->subject, cb->start_match,
- cb->current_position - cb->start_match, f);
+ current_position - cb->start_match, f);
PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
-PCHARSV(cb->subject, cb->current_position,
- cb->subject_length - cb->current_position, f);
+PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
if (f != NULL) fprintf(f, "\n");
@@ -5740,3 +5745,4 @@ return yield;
}
/* End of pcretest.c */
+
diff --git a/testdata/testinput2 b/testdata/testinput2
index 967a241..086e0f4 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4235,4 +4235,8 @@ backtracking verbs. --/
/(?=a\K)/
ring bpattingbobnd $ 1,oern cou \rb\L
+/(?<=((?C)0))/
+ 9010
+ abcd
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 5fb28d5..d414a72 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14650,4 +14650,19 @@ Start of matched string is beyond its end - displaying from end to start.
0: a
0L
+/(?<=((?C)0))/
+ 9010
+--->9010
+ 0 ^ 0
+ 0 ^ 0
+ 0:
+ 1: 0
+ abcd
+--->abcd
+ 0 ^ 0
+ 0 ^ 0
+ 0 ^ 0
+ 0 ^ 0
+No match
+
/-- End of testinput2 --/
--
2.5.0
From 943a5105b9fe2842851003f692c7077a6cdbeefe Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Wed, 10 Feb 2016 19:13:17 +0000
Subject: [PATCH] Fix workspace overflow for (*ACCEPT) with deeply nested
parentheses.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1631 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index b9a239e..5019854 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2014 University of Cambridge
+ Copyright (c) 1997-2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -560,6 +560,7 @@ static const char error_texts[] =
/* 85 */
"parentheses are too deeply nested (stack check)\0"
"digits missing in \\x{} or \\o{}\0"
+ "regular expression is too complicated\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -4591,7 +4592,8 @@ for (;; ptr++)
if (code > cd->start_workspace + cd->workspace_size -
WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */
{
- *errorcodeptr = ERR52;
+ *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)?
+ ERR52 : ERR87;
goto FAILED;
}
@@ -6626,8 +6628,21 @@ for (;; ptr++)
cd->had_accept = TRUE;
for (oc = cd->open_caps; oc != NULL; oc = oc->next)
{
- *code++ = OP_CLOSE;
- PUT2INC(code, 0, oc->number);
+ if (lengthptr != NULL)
+ {
+#ifdef COMPILE_PCRE8
+ *lengthptr += 1 + IMM2_SIZE;
+#elif defined COMPILE_PCRE16
+ *lengthptr += 2 + IMM2_SIZE;
+#elif defined COMPILE_PCRE32
+ *lengthptr += 4 + IMM2_SIZE;
+#endif
+ }
+ else
+ {
+ *code++ = OP_CLOSE;
+ PUT2INC(code, 0, oc->number);
+ }
}
setverb = *code++ =
(cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
diff --git a/pcre_internal.h b/pcre_internal.h
index f7a5ee7..dbfe80e 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2014 University of Cambridge
+ Copyright (c) 1997-2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -2289,7 +2289,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
- ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERRCOUNT };
+ ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
diff --git a/pcreposix.c b/pcreposix.c
index dcc13ef..55b6ddc 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2014 University of Cambridge
+ Copyright (c) 1997-2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -173,7 +173,8 @@ static const int eint[] = {
REG_BADPAT, /* group name must start with a non-digit */
/* 85 */
REG_BADPAT, /* parentheses too deeply nested (stack check) */
- REG_BADPAT /* missing digits in \x{} or \o{} */
+ REG_BADPAT, /* missing digits in \x{} or \o{} */
+ REG_BADPAT /* pattern too complicated */
};
/* Table of texts corresponding to POSIX error codes */
diff --git a/testdata/testinput11 b/testdata/testinput11
index ac9d228..6f0989a 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -138,4 +138,6 @@ is required for these tests. --/
/.((?2)(?R)\1)()/B
+/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
+
/-- End of testinput11 --/
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 280692e..3c485da 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -765,4 +765,7 @@ Memory allocation (code space): 14
25 End
------------------------------------------------------------------
+/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
+Failed: regular expression is too complicated at offset 490
+
/-- End of testinput11 --/
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index cdbda74..e19518d 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -765,4 +765,7 @@ Memory allocation (code space): 28
25 End
------------------------------------------------------------------
+/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
+Failed: missing ) at offset 509
+
/-- End of testinput11 --/
diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
index cb37896..5a4fbb2 100644
--- a/testdata/testoutput11-8
+++ b/testdata/testoutput11-8
@@ -765,4 +765,7 @@ Memory allocation (code space): 10
38 End
------------------------------------------------------------------
+/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
+Failed: missing ) at offset 509
+
/-- End of testinput11 --/
--
2.5.0
From b7537308b7c758f33c347cb0bec62754c43c271f Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sat, 27 Feb 2016 17:38:11 +0000
Subject: [PATCH] Yet another duplicate name bugfix by overestimating the
memory needed (i.e. another hack - PCRE2 has this "properly" fixed).
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1636 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 5019854..4ffea0c 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -7311,7 +7311,12 @@ for (;; ptr++)
so far in order to get the number. If the name is not found, leave
the value of recno as 0 for a forward reference. */
- else
+ /* This patch (removing "else") fixes a problem when a reference is
+ to multiple identically named nested groups from within the nest.
+ Once again, it is not the "proper" fix, and it results in an
+ over-allocation of memory. */
+
+ /* else */
{
ng = cd->named_groups;
for (i = 0; i < cd->names_found; i++, ng++)
diff --git a/testdata/testinput2 b/testdata/testinput2
index 086e0f4..c805f5f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4239,4 +4239,6 @@ backtracking verbs. --/
9010
abcd
+/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index d414a72..800a72f 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14665,4 +14665,6 @@ Start of matched string is beyond its end - displaying from end to start.
0 ^ 0
No match
+/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
+
/-- End of testinput2 --/
--
2.5.0
From 0fc2edb79b3815c6511fd75c36a57893e4acaee6 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sat, 27 Feb 2016 17:55:24 +0000
Subject: [PATCH] Fix pcretest loop for global matching with an ovector size
less than 2.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1637 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcretest.c b/pcretest.c
index 63869fd..78ef517 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -5617,6 +5617,12 @@ while (!done)
break;
}
+ if (use_size_offsets < 2)
+ {
+ fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
+ break;
+ }
+
/* If we have matched an empty string, first check to see if we are at
the end of the subject. If so, the /g loop is over. Otherwise, mimic what
Perl's /g options does. This turns out to be rather cunning. First we set
--
2.5.0
From b3db1b7de5cfaa026ec2bc4a393129461a0f5c57 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Sat, 27 Feb 2016 18:44:41 +0000
Subject: [PATCH] Fix non-diagnosis of missing assertion after (?(?C).
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1638 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Písař: Ported to 8.38.
diff --git a/pcre_compile.c b/pcre_compile.c
index 4ffea0c..254c629 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -485,7 +485,7 @@ static const char error_texts[] =
"lookbehind assertion is not fixed length\0"
"malformed number or name after (?(\0"
"conditional group contains more than two branches\0"
- "assertion expected after (?(\0"
+ "assertion expected after (?( or (?(?C)\0"
"(?R or (?[+-]digits must be followed by )\0"
/* 30 */
"unknown POSIX class name\0"
@@ -6771,6 +6771,15 @@ for (;; ptr++)
for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
tempptr += i + 1;
+
+ /* tempptr should now be pointing to the opening parenthesis of the
+ assertion condition. */
+
+ if (*tempptr != CHAR_LEFT_PARENTHESIS)
+ {
+ *errorcodeptr = ERR28;
+ goto FAILED;
+ }
}
/* For conditions that are assertions, check the syntax, and then exit
diff --git a/testdata/testinput2 b/testdata/testinput2
index c805f5f..75e402e 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4241,4 +4241,6 @@ backtracking verbs. --/
/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
+/\N(?(?C)0?!.)*/
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 800a72f..5e88d1a 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -555,13 +555,13 @@ Failed: malformed number or name after (?( at offset 4
Failed: malformed number or name after (?( at offset 4
/(?(?i))/
-Failed: assertion expected after (?( at offset 3
+Failed: assertion expected after (?( or (?(?C) at offset 3
/(?(abc))/
Failed: reference to non-existent subpattern at offset 7
/(?(?<ab))/
-Failed: assertion expected after (?( at offset 3
+Failed: assertion expected after (?( or (?(?C) at offset 3
/((?s)blah)\s+\1/I
Capturing subpattern count = 1
@@ -7870,7 +7870,7 @@ No match
Failed: malformed number or name after (?( at offset 6
/(?(''))/
-Failed: assertion expected after (?( at offset 4
+Failed: assertion expected after (?( or (?(?C) at offset 4
/(?('R')stuff)/
Failed: reference to non-existent subpattern at offset 7
@@ -14346,7 +14346,7 @@ No match
"((?2)+)((?1))"
"(?(?<E>.*!.*)?)"
-Failed: assertion expected after (?( at offset 3
+Failed: assertion expected after (?( or (?(?C) at offset 3
"X((?2)()*+){2}+"BZ
------------------------------------------------------------------
@@ -14667,4 +14667,7 @@ No match
/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/
+/\N(?(?C)0?!.)*/
+Failed: assertion expected after (?( or (?(?C) at offset 4
+
/-- End of testinput2 --/
--
2.5.0