[pcre/f19] Fix empty-matching possessive zero-repeat groups in interpreted mode

Mon Jul 14 15:16:06 UTC 2014

commit eb3ae4ea7538784cf0d988b69057bf7cc8a91514
Author: Petr Písař <ppisar at redhat.com>
Date:   Mon Jul 14 13:15:27 2014 +0200

    Fix empty-matching possessive zero-repeat groups in interpreted mode

 ...atching-possessive-zero-repeat-groups-bug.patch |  187 ++++++++++++++++++++
 pcre.spec                                          |   10 +-
 2 files changed, 196 insertions(+), 1 deletions(-)
---

diff --git a/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch b/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
new file mode 100644
index 0000000..a2db8d0
--- /dev/null
+++ b/pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
@@ -0,0 +1,187 @@
+From 93c413c5fac105d90f77ab5d03e31e0f64fc6142 Mon Sep 17 00:00:00 2001
+From: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Tue, 27 May 2014 13:18:31 +0000
+Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
+
+Petr Pisar: Ported to 8.33.
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ pcre_exec.c          | 41 ++++++++++++++++++++++++++---------------
+ testdata/testinput1  |  9 +++++++++
+ testdata/testinput8  |  6 ++++++
+ testdata/testoutput1 | 12 ++++++++++++
+ testdata/testoutput8 |  8 ++++++++
+ 5 files changed, 61 insertions(+), 15 deletions(-)
+
+diff --git a/pcre_exec.c b/pcre_exec.c
+index ab76d02..481e899 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -1169,10 +1169,15 @@ for (;;)
+         if (rrc == MATCH_KETRPOS)
+           {
+           offset_top = md->end_offset_top;
+-          eptr = md->end_match_ptr;
+           ecode = md->start_code + code_offset;
+           save_capture_last = md->capture_last;
+           matched_once = TRUE;
++          if (eptr == md->end_match_ptr)   /* Matched an empty string */
++            {
++            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
++            break;
++            }  
++          eptr = md->end_match_ptr;
+           continue;
+           }
+ 
+@@ -1242,9 +1247,14 @@ for (;;)
+       if (rrc == MATCH_KETRPOS)
+         {
+         offset_top = md->end_offset_top;
+-        eptr = md->end_match_ptr;
+         ecode = md->start_code + code_offset;
+         matched_once = TRUE;
++        if (eptr == md->end_match_ptr)  /* Matched an empty string */
++          {
++          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
++          break;
++          }  
++        eptr = md->end_match_ptr;
+         continue;
+         }
+ 
+@@ -1976,7 +1986,7 @@ for (;;)
+     case OP_KETRMAX:
+     case OP_KETRPOS:
+     prev = ecode - GET(ecode, 1);
+-
++    
+     /* If this was a group that remembered the subject start, in order to break
+     infinite repeats of empty string matches, retrieve the subject start from
+     the chain. Otherwise, set it NULL. */
+@@ -2001,7 +2011,7 @@ for (;;)
+       md->start_match_ptr = mstart;
+       RRETURN(MATCH_MATCH);         /* Sets md->mark */
+       }
+-
++      
+     /* For capturing groups we have to check the group number back at the start
+     and if necessary complete handling an extraction by setting the offsets and
+     bumping the high water mark. Whole-pattern recursion is coded as a recurse
+@@ -2061,6 +2071,18 @@ for (;;)
+         }
+       }
+ 
++    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
++    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
++    at a time from the outer level, thus saving stack. This must precede the 
++    empty string test - in this case that test is done at the outer level. */
++
++    if (*ecode == OP_KETRPOS)
++      {
++      md->end_match_ptr = eptr;
++      md->end_offset_top = offset_top;
++      RRETURN(MATCH_KETRPOS);
++      }
++
+     /* For an ordinary non-repeating ket, just continue at this level. This
+     also happens for a repeating ket if no characters were matched in the
+     group. This is the forcible breaking of infinite loops as implemented in
+@@ -2083,17 +2105,6 @@ for (;;)
+       break;
+       }
+ 
+-    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+-    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+-    at a time from the outer level, thus saving stack. */
+-
+-    if (*ecode == OP_KETRPOS)
+-      {
+-      md->end_match_ptr = eptr;
+-      md->end_offset_top = offset_top;
+-      RRETURN(MATCH_KETRPOS);
+-      }
+-
+     /* The normal repeating kets try the rest of the pattern or restart from
+     the preceding bracket, in the appropriate order. In the second case, we can
+     use tail recursion to avoid using another stack frame, unless we have an
+diff --git a/testdata/testinput1 b/testdata/testinput1
+index d77d8ac..6bde9ec 100644
+--- a/testdata/testinput1
++++ b/testdata/testinput1
+@@ -5614,4 +5614,13 @@ AbcdCBefgBhiBqz
+ /[\Q]a\E]+/
+     aa]]
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testinput8 b/testdata/testinput8
+index d91013b..98a0b38 100644
+--- a/testdata/testinput8
++++ b/testdata/testinput8
+@@ -4801,4 +4801,10 @@
+ /abcd/
+     abcd\O0
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
+ /-- End of testinput8 --/
+diff --git a/testdata/testoutput1 b/testdata/testoutput1
+index 1b0b8dc..cb9592d 100644
+--- a/testdata/testoutput1
++++ b/testdata/testoutput1
+@@ -9208,4 +9208,16 @@ No match
+     aa]]
+  0: aa]]
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testoutput8 b/testdata/testoutput8
+index 75affbe..666b67e 100644
+--- a/testdata/testoutput8
++++ b/testdata/testoutput8
+@@ -8020,4 +8020,12 @@ Error -30 (invalid data in workspace for DFA restart)
+     abcd\O0
+ Matched, but offsets vector is too small to show all matches
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
+ /-- End of testinput8 --/
+-- 
+1.9.3
+
diff --git a/pcre.spec b/pcre.spec
index 7f2777f..5a0124e 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -2,7 +2,7 @@
 #%%global rcversion RC1
 Name: pcre
 Version: 8.32
-Release: %{?rcversion:0.}9%{?rcversion:.%rcversion}%{?dist}
+Release: %{?rcversion:0.}10%{?rcversion:.%rcversion}%{?dist}
 %global myversion %{version}%{?rcversion:-%rcversion}
 Summary: Perl-compatible regular expression library
 Group: System Environment/Libraries
@@ -33,6 +33,9 @@ Patch8: pcre-8.32-Fix-bad-starting-data-when-char-with-more-than-one-o.patch
 # Fix character class with a literal quotation, bug #1111054,
 # upstream bug #1494, in upstream after 8.35
 Patch9: pcre-8.32-Fix-bad-compile-of-Qx-.-where-x-is-any-character.patch
+# Fix empty-matching possessive zero-repeat groups in interpreted mode,
+# bug #1119241, upstream bug #1500, in upstream after 8.35
+Patch10: pcre-8.33-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
 BuildRequires: readline-devel
 # New libtool to get rid of rpath
 BuildRequires: autoconf, automake, libtool
@@ -82,6 +85,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
 %patch7 -p1 -b .jitted_range_check
 %patch8 -p1 -b .starting_data
 %patch9 -p1 -b .class_with_literal
+%patch10 -p1 -b .empty_zero_repeat_group
 # Because of rpath patch
 libtoolize --copy --force && autoreconf -vif
 # One contributor's name is non-UTF-8
@@ -145,6 +149,10 @@ make check
 %{_mandir}/man1/pcretest.*
 
 %changelog
+* Mon Jul 14 2014 Petr Pisar <ppisar at redhat.com> - 8.32-10
+- Fix empty-matching possessive zero-repeat groups in interpreted mode
+  (bug #1119241)
+
 * Thu Jun 19 2014 Petr Pisar <ppisar at redhat.com> - 8.32-9
 - Fix bad starting data when char with more than one other case follows
   circumflex in multiline UTF mode (bug #1110620)