ppisar pushed to pcre (f20). "Fix compliation of mutual recursion inside a lookbehind assertion"

notifications at fedoraproject.org notifications at fedoraproject.org
Fri Apr 10 13:47:18 UTC 2015


>From 1e0eac5695c97e061a7c1a97fb0b8e8f87c06348 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
Date: Thu, 9 Apr 2015 18:29:37 +0200
Subject: Fix compliation of mutual recursion inside a lookbehind assertion


diff --git a/pcre-8.33-Fix-a-bug-concerned-with-scanning-for-empty-string-m.patch b/pcre-8.33-Fix-a-bug-concerned-with-scanning-for-empty-string-m.patch
new file mode 100644
index 0000000..c5478f8
--- /dev/null
+++ b/pcre-8.33-Fix-a-bug-concerned-with-scanning-for-empty-string-m.patch
@@ -0,0 +1,160 @@
+From 03efd00a6f2e015228d88a36c8d1de05e97e43e9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
+Date: Fri, 10 Apr 2015 14:23:51 +0200
+Subject: [PATCH] Fix a bug concerned with scanning for empty string matching.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This is part of upstream commit ported to 8.33:
+
+commit 74d96caf6251eff2f6c6a3e879268ce2d2a6c9be
+Author: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date:   Fri Jul 5 10:38:37 2013 +0000
+
+    Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for
+    empty string matching.
+
+    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1348 2f5784b3-3f2a-0410-8824-
+cb99058d5e15
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ pcre_compile.c | 67 ++++++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 51 insertions(+), 16 deletions(-)
+
+diff --git a/pcre_compile.c b/pcre_compile.c
+index 0b7d4cf..7e4c7e8 100644
+--- a/pcre_compile.c
++++ b/pcre_compile.c
+@@ -2353,15 +2353,23 @@ Arguments:
+   endcode     points to where to stop
+   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
+   cd          contains pointers to tables etc.
++  recurses    chain of recurse_check to catch mutual recursion
+ 
+ Returns:      TRUE if what is matched could be empty
+ */
+ 
++typedef struct recurse_check {
++  struct recurse_check *prev;
++  const pcre_uchar *group;
++} recurse_check;    
++
+ static BOOL
+ could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
+-  BOOL utf, compile_data *cd)
++  BOOL utf, compile_data *cd, recurse_check *recurses)
+ {
+ register pcre_uchar c;
++recurse_check this_recurse;
++
+ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
+      code < endcode;
+      code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
+@@ -2389,25 +2397,50 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
+ 
+   if (c == OP_RECURSE)
+     {
+-    const pcre_uchar *scode;
++    const pcre_uchar *scode = cd->start_code + GET(code, 1);
+     BOOL empty_branch;
+ 
+-    /* Test for forward reference */
++    /* Test for forward reference or uncompleted reference. This is disabled
++    when called to scan a completed pattern by setting cd->start_workspace to
++    NULL. */
+ 
+-    for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
+-      if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
++    if (cd->start_workspace != NULL)
++      { 
++      const pcre_uchar *tcode; 
++      for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
++        if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
++      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
++      }
++    
++    /* If we are scanning a completed pattern, there are no forward references 
++    and all groups are complete. We need to detect whether this is a recursive 
++    call, as otherwise there will be an infinite loop. If it is a recursion,
++    just skip over it. Simple recursions are easily detected. For mutual 
++    recursions we keep a chain on the stack. */ 
++     
++    else
++      {  
++      recurse_check *r = recurses;
++      const pcre_uchar *endgroup = scode;
++       
++      do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
++      if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
++      
++      for (r = recurses; r != NULL; r = r->prev)
++        if (r->group == scode) break;
++      if (r != NULL) continue;   /* Mutual recursion */
++      }
+ 
+-    /* Not a forward reference, test for completed backward reference */
++    /* Completed reference; scan the referenced group, remembering it on the
++    stack chain to detect mutual recursions. */
+ 
+     empty_branch = FALSE;
+-    scode = cd->start_code + GET(code, 1);
+-    if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
+-
+-    /* Completed backwards reference */
+-
++    this_recurse.prev = recurses;
++    this_recurse.group = scode; 
++     
+     do
+       {
+-      if (could_be_empty_branch(scode, endcode, utf, cd))
++      if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
+         {
+         empty_branch = TRUE;
+         break;
+@@ -2463,7 +2496,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
+       empty_branch = FALSE;
+       do
+         {
+-        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
++        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
+           empty_branch = TRUE;
+         code += GET(code, 1);
+         }
+@@ -2659,7 +2692,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
+ {
+ while (bcptr != NULL && bcptr->current_branch >= code)
+   {
+-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
++  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
+     return FALSE;
+   bcptr = bcptr->outer;
+   }
+@@ -5398,7 +5431,7 @@ for (;; ptr++)
+             pcre_uchar *scode = bracode;
+             do
+               {
+-              if (could_be_empty_branch(scode, ketcode, utf, cd))
++              if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
+                 {
+                 *bracode += OP_SBRA - OP_BRA;
+                 break;
+@@ -8231,10 +8264,12 @@ if (cd->hwm > cd->start_workspace)
+     }
+   }
+ 
+-/* If the workspace had to be expanded, free the new memory. */
++/* If the workspace had to be expanded, free the new memory. Set the pointer to 
++NULL to indicate that forward references have been filled in. */
+ 
+ if (cd->workspace_size > COMPILE_WORK_SIZE)
+   (PUBL(free))((void *)cd->start_workspace);
++cd->start_workspace = NULL;   
+ 
+ /* Give an error if there's back reference to a non-existent capturing
+ subpattern. */
+-- 
+2.1.0
+
diff --git a/pcre-8.33-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch b/pcre-8.33-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
new file mode 100644
index 0000000..0f8f6d1
--- /dev/null
+++ b/pcre-8.33-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
@@ -0,0 +1,146 @@
+From 519375fc19700ada5543e1f3eeaece6a25989e89 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
+Date: Thu, 9 Apr 2015 18:35:04 +0200
+Subject: [PATCH] Fix stack overflow instead of diagnostic for mutual recursion
+ inside a lookbehind assertion.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Upstream commit porter to 8.33:
+
+commit 256d94987eecd7eb87b37e1c981a4e753ed8ab7a
+Author: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date:   Wed Apr 1 15:43:53 2015 +0000
+
+    Fix stack overflow instead of diagnostic for mutual recursion inside a
+    lookbehind assertion.
+
+    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1542 2f5784b3-3f2a-0410-8824-cb99058d5e15
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+
+diff --git a/pcre_compile.c b/pcre_compile.c
+index 7e4c7e8..a6e6f2d 100644
+--- a/pcre_compile.c
++++ b/pcre_compile.c
+@@ -648,6 +648,14 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
+ #endif
+ 
+ 
++/* Structure for mutual recursion detection. */
++
++typedef struct recurse_check {
++  struct recurse_check *prev;
++  const pcre_uchar *group;
++} recurse_check;
++
++
+ 
+ /*************************************************
+ *            Find an error text                  *
+@@ -1733,6 +1741,7 @@ Arguments:
+   utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
+   atend    TRUE if called when the pattern is complete
+   cd       the "compile data" structure
++  recurses    chain of recurse_check to catch mutual recursion
+ 
+ Returns:   the fixed length,
+              or -1 if there is no fixed length,
+@@ -1742,10 +1751,11 @@ Returns:   the fixed length,
+ */
+ 
+ static int
+-find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)
++find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
++  recurse_check *recurses)
+ {
+ int length = -1;
+-
++recurse_check this_recurse;
+ register int branchlength = 0;
+ register pcre_uchar *cc = code + 1 + LINK_SIZE;
+ 
+@@ -1770,7 +1780,8 @@ for (;;)
+     case OP_ONCE:
+     case OP_ONCE_NC:
+     case OP_COND:
+-    d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);
++    d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
++      recurses);
+     if (d < 0) return d;
+     branchlength += d;
+     do cc += GET(cc, 1); while (*cc == OP_ALT);
+@@ -1804,7 +1815,15 @@ for (;;)
+     cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
+     do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
+     if (cc > cs && cc < ce) return -1;                    /* Recursion */
+-    d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);
++    else   /* Check for mutual recursion */
++      {
++      recurse_check *r = recurses;
++      for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
++      if (r != NULL) return -1;   /* Mutual recursion */
++      }
++    this_recurse.prev = recurses;
++    this_recurse.group = cs;
++    d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse);
+     if (d < 0) return d;
+     branchlength += d;
+     cc += 1 + LINK_SIZE;
+@@ -2358,11 +2377,6 @@ Arguments:
+ Returns:      TRUE if what is matched could be empty
+ */
+ 
+-typedef struct recurse_check {
+-  struct recurse_check *prev;
+-  const pcre_uchar *group;
+-} recurse_check;    
+-
+ static BOOL
+ could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
+   BOOL utf, compile_data *cd, recurse_check *recurses)
+@@ -7301,7 +7315,7 @@ for (;;)
+       int fixed_length;
+       *code = OP_END;
+       fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
+-        FALSE, cd);
++        FALSE, cd, NULL);
+       DPRINTF(("fixed length = %d\n", fixed_length));
+       if (fixed_length == -3)
+         {
+@@ -8304,7 +8318,7 @@ if (cd->check_lookbehind)
+       int end_op = *be;
+       *be = OP_END;
+       fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
+-        cd);
++        cd, NULL);
+       *be = end_op;
+       DPRINTF(("fixed length = %d\n", fixed_length));
+       if (fixed_length < 0)
+diff --git a/testdata/testinput2 b/testdata/testinput2
+index aba73af..b23fe6b 100644
+--- a/testdata/testinput2
++++ b/testdata/testinput2
+@@ -3845,4 +3845,6 @@ backtracking verbs. --/
+ "(?(?=)?==)(((((((((?=)))))))))"
+     a
+ 
++"(?<=((?2))((?1)))"
++
+ /-- End of testinput2 --/
+diff --git a/testdata/testoutput2 b/testdata/testoutput2
+index 0dcc5f9..028b100 100644
+--- a/testdata/testoutput2
++++ b/testdata/testoutput2
+@@ -12699,4 +12699,7 @@ Error -21 (recursion limit exceeded)
+     a
+ No match
+ 
++"(?<=((?2))((?1)))"
++Failed: lookbehind assertion is not fixed length at offset 17
++
+ /-- End of testinput2 --/
+-- 
+2.1.0
+
diff --git a/pcre.spec b/pcre.spec
index f540d6d..80862ae 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -43,6 +43,13 @@ Patch10: pcre-8.33-Fix-memory-bug-for-S-V-H-compile.patch
 # Fix compilation of a parenthesized comment, bug #1210410,
 # in upstream after 8.36
 Patch11: pcre-8.36-Fix-comment-between-subroutine-call-and-quantifier-b.patch
+# Fix checking if a group can match empty string. Needed for
+# Fix-stack-overflow-instead-of-diagnostic-for-mutual patch. Bug #1210417,
+# in upstream after 8.33
+Patch12: pcre-8.33-Fix-a-bug-concerned-with-scanning-for-empty-string-m.patch
+# Fix compliation of mutual recursion inside a lookbehind assertion,
+# bug #1210417, in upstream after 8.36
+Patch13: pcre-8.33-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
 BuildRequires: readline-devel
 # New libtool to get rid of rpath
 BuildRequires: autoconf, automake, libtool
@@ -94,6 +101,8 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
 %patch9 -p1 -b .zero_repeat_assertion
 %patch10 -p1 -b .size_special_class
 %patch11 -p1 -b .parenthesized_comment
+%patch12 -p1 -b .empty_string_check
+%patch13 -p1 -b .mutual_recursion_in_assertion
 # Because of rpath patch
 libtoolize --copy --force && autoreconf -vif
 # One contributor's name is non-UTF-8
@@ -161,6 +170,8 @@ make check
 - Fix computing size for pattern with a negated special calss in on-UCP mode
   (bug #1210383)
 - Fix compilation of a parenthesized comment (bug #1210410)
+- Fix compliation of mutual recursion inside a lookbehind assertion
+  (bug #1210417)
 
 * Tue Dec 02 2014 Petr Pisar <ppisar at redhat.com> - 8.33-8
 - Fix CVE-2014-8964 (unused memory usage on zero-repeat assertion condition)
-- 
cgit v0.10.2


	http://pkgs.fedoraproject.org/cgit/pcre.git/commit/?h=f20&id=1e0eac5695c97e061a7c1a97fb0b8e8f87c06348


More information about the scm-commits mailing list