[pcre] Add pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
Petr Pisar
ppisar at fedoraproject.org
Mon Jul 14 12:20:32 UTC 2014
commit 1860c03fdead52378512df6a9175f48a9d45f2f4
Author: Petr Písař <ppisar at redhat.com>
Date: Mon Jul 14 14:19:45 2014 +0200
Add pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
Needed for
commit 5d1fed8df94a0db02c12ce61c0c2ddc8c97e64d3
Author: Petr Písař <ppisar at redhat.com>
Date: Mon Jul 14 13:15:27 2014 +0200
Fix empty-matching possessive zero-repeat groups in interpreted mode
...atching-possessive-zero-repeat-groups-bug.patch | 192 ++++++++++++++++++++
1 files changed, 192 insertions(+), 0 deletions(-)
---
diff --git a/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch b/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
new file mode 100644
index 0000000..2d82134
--- /dev/null
+++ b/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
@@ -0,0 +1,192 @@
+From 31152356367ef3cf3440c0431d2898f198e4dd18 Mon Sep 17 00:00:00 2001
+From: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Tue, 27 May 2014 13:18:31 +0000
+Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+
+Petr Pisar: Ported to 8.35.
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ pcre_exec.c | 43 +++++++++++++++++++++++++++----------------
+ testdata/testinput1 | 9 +++++++++
+ testdata/testinput8 | 6 ++++++
+ testdata/testoutput1 | 12 ++++++++++++
+ testdata/testoutput8 | 8 ++++++++
+ 5 files changed, 62 insertions(+), 16 deletions(-)
+
+diff --git a/pcre_exec.c b/pcre_exec.c
+index 5dec992..5a8dbad 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -1167,11 +1167,16 @@ for (;;)
+ if (rrc == MATCH_KETRPOS)
+ {
+ offset_top = md->end_offset_top;
+- eptr = md->end_match_ptr;
+ ecode = md->start_code + code_offset;
+ save_capture_last = md->capture_last;
+ matched_once = TRUE;
+ mstart = md->start_match_ptr; /* In case \K changed it */
++ if (eptr == md->end_match_ptr) /* Matched an empty string */
++ {
++ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
++ break;
++ }
++ eptr = md->end_match_ptr;
+ continue;
+ }
+
+@@ -1241,10 +1246,15 @@ for (;;)
+ if (rrc == MATCH_KETRPOS)
+ {
+ offset_top = md->end_offset_top;
+- eptr = md->end_match_ptr;
+ ecode = md->start_code + code_offset;
+ matched_once = TRUE;
+ mstart = md->start_match_ptr; /* In case \K reset it */
++ if (eptr == md->end_match_ptr) /* Matched an empty string */
++ {
++ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
++ break;
++ }
++ eptr = md->end_match_ptr;
+ continue;
+ }
+
+@@ -1894,7 +1904,7 @@ for (;;)
+ case OP_KETRMAX:
+ case OP_KETRPOS:
+ prev = ecode - GET(ecode, 1);
+-
++
+ /* If this was a group that remembered the subject start, in order to break
+ infinite repeats of empty string matches, retrieve the subject start from
+ the chain. Otherwise, set it NULL. */
+@@ -1919,7 +1929,7 @@ for (;;)
+ md->start_match_ptr = mstart;
+ RRETURN(MATCH_MATCH); /* Sets md->mark */
+ }
+-
++
+ /* For capturing groups we have to check the group number back at the start
+ and if necessary complete handling an extraction by setting the offsets and
+ bumping the high water mark. Whole-pattern recursion is coded as a recurse
+@@ -1979,6 +1989,19 @@ for (;;)
+ }
+ }
+
++ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
++ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
++ at a time from the outer level, thus saving stack. This must precede the
++ empty string test - in this case that test is done at the outer level. */
++
++ if (*ecode == OP_KETRPOS)
++ {
++ md->start_match_ptr = mstart; /* In case \K reset it */
++ md->end_match_ptr = eptr;
++ md->end_offset_top = offset_top;
++ RRETURN(MATCH_KETRPOS);
++ }
++
+ /* For an ordinary non-repeating ket, just continue at this level. This
+ also happens for a repeating ket if no characters were matched in the
+ group. This is the forcible breaking of infinite loops as implemented in
+@@ -2001,18 +2024,6 @@ for (;;)
+ break;
+ }
+
+- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+- at a time from the outer level, thus saving stack. */
+-
+- if (*ecode == OP_KETRPOS)
+- {
+- md->start_match_ptr = mstart; /* In case \K reset it */
+- md->end_match_ptr = eptr;
+- md->end_offset_top = offset_top;
+- RRETURN(MATCH_KETRPOS);
+- }
+-
+ /* The normal repeating kets try the rest of the pattern or restart from
+ the preceding bracket, in the appropriate order. In the second case, we can
+ use tail recursion to avoid using another stack frame, unless we have an
+diff --git a/testdata/testinput1 b/testdata/testinput1
+index f933692..ffb9455 100644
+--- a/testdata/testinput1
++++ b/testdata/testinput1
+@@ -5675,4 +5675,13 @@ AbcdCBefgBhiBqz
+ /[\Q]a\E]+/
+ aa]]
+
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testinput8 b/testdata/testinput8
+index bb2747b..06334cd 100644
+--- a/testdata/testinput8
++++ b/testdata/testinput8
+@@ -4831,4 +4831,10 @@
+ /[ab]{2,}?/
+ aaaa
+
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
+ /-- End of testinput8 --/
+diff --git a/testdata/testoutput1 b/testdata/testoutput1
+index 3d9a328..b2ae430 100644
+--- a/testdata/testoutput1
++++ b/testdata/testoutput1
+@@ -9325,4 +9325,16 @@ No match
+ aa]]
+ 0: aa]]
+
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testoutput8 b/testdata/testoutput8
+index 3861ea4..95c4e4d 100644
+--- a/testdata/testoutput8
++++ b/testdata/testoutput8
+@@ -7777,4 +7777,12 @@ Matched, but offsets vector is too small to show all matches
+ 1: aaa
+ 2: aa
+
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER
++
+ /-- End of testinput8 --/
+--
+1.9.3
+
More information about the scm-commits
mailing list