[pcre] Add pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch

Petr Pisar ppisar at fedoraproject.org
Mon Jul 14 12:20:32 UTC 2014


commit 1860c03fdead52378512df6a9175f48a9d45f2f4
Author: Petr Písař <ppisar at redhat.com>
Date:   Mon Jul 14 14:19:45 2014 +0200

    Add pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
    
    Needed for
    
    commit 5d1fed8df94a0db02c12ce61c0c2ddc8c97e64d3
    Author: Petr Písař <ppisar at redhat.com>
    Date:   Mon Jul 14 13:15:27 2014 +0200
    
        Fix empty-matching possessive zero-repeat groups in interpreted mode

 ...atching-possessive-zero-repeat-groups-bug.patch |  192 ++++++++++++++++++++
 1 files changed, 192 insertions(+), 0 deletions(-)
---
diff --git a/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch b/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
new file mode 100644
index 0000000..2d82134
--- /dev/null
+++ b/pcre-8.35-Fix-empty-matching-possessive-zero-repeat-groups-bug.patch
@@ -0,0 +1,192 @@
+From 31152356367ef3cf3440c0431d2898f198e4dd18 Mon Sep 17 00:00:00 2001
+From: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Tue, 27 May 2014 13:18:31 +0000
+Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+
+Petr Pisar: Ported to 8.35.
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ pcre_exec.c          | 43 +++++++++++++++++++++++++++----------------
+ testdata/testinput1  |  9 +++++++++
+ testdata/testinput8  |  6 ++++++
+ testdata/testoutput1 | 12 ++++++++++++
+ testdata/testoutput8 |  8 ++++++++
+ 5 files changed, 62 insertions(+), 16 deletions(-)
+
+diff --git a/pcre_exec.c b/pcre_exec.c
+index 5dec992..5a8dbad 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -1167,11 +1167,16 @@ for (;;)
+         if (rrc == MATCH_KETRPOS)
+           {
+           offset_top = md->end_offset_top;
+-          eptr = md->end_match_ptr;
+           ecode = md->start_code + code_offset;
+           save_capture_last = md->capture_last;
+           matched_once = TRUE;
+           mstart = md->start_match_ptr;    /* In case \K changed it */
++          if (eptr == md->end_match_ptr)   /* Matched an empty string */
++            {
++            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
++            break;
++            }  
++          eptr = md->end_match_ptr;
+           continue;
+           }
+ 
+@@ -1241,10 +1246,15 @@ for (;;)
+       if (rrc == MATCH_KETRPOS)
+         {
+         offset_top = md->end_offset_top;
+-        eptr = md->end_match_ptr;
+         ecode = md->start_code + code_offset;
+         matched_once = TRUE;
+         mstart = md->start_match_ptr;   /* In case \K reset it */
++        if (eptr == md->end_match_ptr)  /* Matched an empty string */
++          {
++          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
++          break;
++          }  
++        eptr = md->end_match_ptr;
+         continue;
+         }
+ 
+@@ -1894,7 +1904,7 @@ for (;;)
+     case OP_KETRMAX:
+     case OP_KETRPOS:
+     prev = ecode - GET(ecode, 1);
+-
++    
+     /* If this was a group that remembered the subject start, in order to break
+     infinite repeats of empty string matches, retrieve the subject start from
+     the chain. Otherwise, set it NULL. */
+@@ -1919,7 +1929,7 @@ for (;;)
+       md->start_match_ptr = mstart;
+       RRETURN(MATCH_MATCH);         /* Sets md->mark */
+       }
+-
++      
+     /* For capturing groups we have to check the group number back at the start
+     and if necessary complete handling an extraction by setting the offsets and
+     bumping the high water mark. Whole-pattern recursion is coded as a recurse
+@@ -1979,6 +1989,19 @@ for (;;)
+         }
+       }
+ 
++    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
++    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
++    at a time from the outer level, thus saving stack. This must precede the 
++    empty string test - in this case that test is done at the outer level. */
++
++    if (*ecode == OP_KETRPOS)
++      {
++      md->start_match_ptr = mstart;    /* In case \K reset it */
++      md->end_match_ptr = eptr;
++      md->end_offset_top = offset_top;
++      RRETURN(MATCH_KETRPOS);
++      }
++
+     /* For an ordinary non-repeating ket, just continue at this level. This
+     also happens for a repeating ket if no characters were matched in the
+     group. This is the forcible breaking of infinite loops as implemented in
+@@ -2001,18 +2024,6 @@ for (;;)
+       break;
+       }
+ 
+-    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+-    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+-    at a time from the outer level, thus saving stack. */
+-
+-    if (*ecode == OP_KETRPOS)
+-      {
+-      md->start_match_ptr = mstart;    /* In case \K reset it */
+-      md->end_match_ptr = eptr;
+-      md->end_offset_top = offset_top;
+-      RRETURN(MATCH_KETRPOS);
+-      }
+-
+     /* The normal repeating kets try the rest of the pattern or restart from
+     the preceding bracket, in the appropriate order. In the second case, we can
+     use tail recursion to avoid using another stack frame, unless we have an
+diff --git a/testdata/testinput1 b/testdata/testinput1
+index f933692..ffb9455 100644
+--- a/testdata/testinput1
++++ b/testdata/testinput1
+@@ -5675,4 +5675,13 @@ AbcdCBefgBhiBqz
+ /[\Q]a\E]+/
+     aa]]
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testinput8 b/testdata/testinput8
+index bb2747b..06334cd 100644
+--- a/testdata/testinput8
++++ b/testdata/testinput8
+@@ -4831,4 +4831,10 @@
+ /[ab]{2,}?/
+     aaaa    
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++
+ /-- End of testinput8 --/
+diff --git a/testdata/testoutput1 b/testdata/testoutput1
+index 3d9a328..b2ae430 100644
+--- a/testdata/testoutput1
++++ b/testdata/testoutput1
+@@ -9325,4 +9325,16 @@ No match
+     aa]]
+  0: aa]]
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
+ /-- End of testinput1 --/
+diff --git a/testdata/testoutput8 b/testdata/testoutput8
+index 3861ea4..95c4e4d 100644
+--- a/testdata/testoutput8
++++ b/testdata/testoutput8
+@@ -7777,4 +7777,12 @@ Matched, but offsets vector is too small to show all matches
+  1: aaa
+  2: aa
+ 
++'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
++'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
++    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
++ 0: NON QUOTED "QUOT""ED" AFTER 
++
+ /-- End of testinput8 --/
+-- 
+1.9.3
+


More information about the scm-commits mailing list