[pcre] Reset non-matched groups within capturing group up to forced match

Petr Pisar ppisar at fedoraproject.org
Fri Nov 7 12:01:49 UTC 2014


commit ed53e29bbfb7ae3c48148d0d6bf4f903692337b7
Author: Petr Písař <ppisar at redhat.com>
Date:   Fri Nov 7 13:00:15 2014 +0100

    Reset non-matched groups within capturing group up to forced match

 ...n-there-are-unset-groups-prior-to-ACCEPT-.patch |   98 ++++++++++++++++++++
 pcre.spec                                          |   10 ++-
 2 files changed, 107 insertions(+), 1 deletions(-)
---
diff --git a/pcre-8.36-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch b/pcre-8.36-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch
new file mode 100644
index 0000000..fd5cc6c
--- /dev/null
+++ b/pcre-8.36-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch
@@ -0,0 +1,98 @@
+From e2eeaf85f1b5d6c4669b621d309ff904cbf96f4b Mon Sep 17 00:00:00 2001
+From: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Wed, 5 Nov 2014 15:08:03 +0000
+Subject: [PATCH] Fix bug when there are unset groups prior to (*ACCEPT) within
+ a capturing group.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1510 2f5784b3-3f2a-0410-8824-cb99058d5e15
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ ChangeLog            | 12 ++++++++++++
+ pcre_exec.c          | 13 ++++++++++++-
+ testdata/testinput1  |  3 +++
+ testdata/testoutput1 |  9 +++++++++
+ 4 files changed, 36 insertions(+), 1 deletion(-)
+
+diff --git a/ChangeLog b/ChangeLog
+index 8abdfb5..06da1c4 100644
+--- a/ChangeLog
++++ b/ChangeLog
+@@ -1,6 +1,18 @@
+ ChangeLog for PCRE
+ ------------------
+ 
++Version 8.37 xx-xxx-201x
++------------------------
++
++1.  When an (*ACCEPT) is triggered inside capturing parentheses, it arranges 
++    for those parentheses to be closed with whatever has been captured so far. 
++    However, it was failing to mark any other groups between the hightest 
++    capture so far and the currrent group as "unset". Thus, the ovector for 
++    those groups contained whatever was previously there. An example is the 
++    pattern /(x)|((*ACCEPT))/ when matched against "abcd".
++
++
++
+ Version 8.36 26-September-2014
+ ------------------------------
+ 
+diff --git a/pcre_exec.c b/pcre_exec.c
+index 654eb9e..fdf7067 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -1474,7 +1474,18 @@ for (;;)
+       md->offset_vector[offset] =
+         md->offset_vector[md->offset_end - number];
+       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
+-      if (offset_top <= offset) offset_top = offset + 2;
++
++      /* If this group is at or above the current highwater mark, ensure that
++      any groups between the current high water mark and this group are marked
++      unset and then update the high water mark. */
++
++      if (offset >= offset_top)
++        {
++        register int *iptr = md->offset_vector + offset_top;
++        register int *iend = md->offset_vector + offset;
++        while (iptr < iend) *iptr++ = -1;
++        offset_top = offset + 2;
++        }
+       }
+     ecode += 1 + IMM2_SIZE;
+     break;
+diff --git a/testdata/testinput1 b/testdata/testinput1
+index 123e3d3..091e307 100644
+--- a/testdata/testinput1
++++ b/testdata/testinput1
+@@ -5720,4 +5720,7 @@ AbcdCBefgBhiBqz
+ /[\Q]a\E]+/
+     aa]]
+ 
++/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
++    1234abcd
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testoutput1 b/testdata/testoutput1
+index 5e71900..ba7ca37 100644
+--- a/testdata/testoutput1
++++ b/testdata/testoutput1
+@@ -9411,4 +9411,13 @@ No match
+     aa]]
+  0: aa]]
+ 
++/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
++    1234abcd
++ 0: 
++ 1: <unset>
++ 2: <unset>
++ 3: <unset>
++ 4: <unset>
++ 5: 
++
+ /-- End of testinput1 --/
+-- 
+1.9.3
+
diff --git a/pcre.spec b/pcre.spec
index a9c6872..ff16b9a 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -2,7 +2,7 @@
 #%%global rcversion RC1
 Name: pcre
 Version: 8.36
-Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist}
+Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist}
 %global myversion %{version}%{?rcversion:-%rcversion}
 Summary: Perl-compatible regular expression library
 Group: System Environment/Libraries
@@ -13,6 +13,9 @@ Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/%{name}/%{?rcversion:Te
 Patch0: pcre-8.21-multilib.patch
 # Refused by upstream, bug #675477
 Patch1: pcre-8.32-refused_spelling_terminated.patch
+# Reset non-matched groups within capturing group up to forced match,
+# bug #1161587, in upstream after 8.36
+Patch2: pcre-8.36-Fix-bug-when-there-are-unset-groups-prior-to-ACCEPT-.patch
 BuildRequires: readline-devel
 # New libtool to get rid of rpath
 BuildRequires: autoconf, automake, libtool
@@ -54,6 +57,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
 # Get rid of rpath
 %patch0 -p1 -b .multilib
 %patch1 -p1 -b .terminated_typos
+%patch2 -p1 -b .reset_groups
 # Because of rpath patch
 libtoolize --copy --force && autoreconf -vif
 # One contributor's name is non-UTF-8
@@ -124,6 +128,10 @@ make %{?_smp_mflags} check VERBOSE=yes
 %{_mandir}/man1/pcretest.*
 
 %changelog
+* Fri Nov 07 2014 Petr Pisar <ppisar at redhat.com> - 8.36-2
+- Reset non-matched groups within capturing group up to forced match
+  (bug #1161587)
+
 * Tue Oct 07 2014 Petr Pisar <ppisar at redhat.com> - 8.36-1
 - 8.36 bump
 


More information about the scm-commits mailing list