ppisar pushed to pcre (f21). "Fix pcregrep loop when \K is used in a lookbehind assertion"

notifications at fedoraproject.org notifications at fedoraproject.org
Fri Apr 10 08:06:35 UTC 2015


>From 1684bcb83687ea4167cb29b890c26f0ca975190e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
Date: Thu, 9 Apr 2015 18:51:34 +0200
Subject: Fix pcregrep loop when \K is used in a lookbehind assertion


diff --git a/pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch b/pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
new file mode 100644
index 0000000..fd7ef9f
--- /dev/null
+++ b/pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
@@ -0,0 +1,211 @@
+From 8f907e8731b5eb4aca840a1979e76eed1b5c8175 Mon Sep 17 00:00:00 2001
+From: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Tue, 7 Apr 2015 15:52:11 +0000
+Subject: [PATCH] Fix pcregrep loop when \K is used in a lookbehind assertion.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Upstream commit ported to 8.36:
+
+commit b4332d7dd831b3547b3f541495de4a79554e538e
+Author: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date:   Tue Apr 7 15:52:11 2015 +0000
+
+    Fix pcregrep loop when \K is used in a lookbehind assertion.
+
+    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ RunGrepTest         |   5 +++
+ pcregrep.c          | 109 ++++++++++++++++++++++++++++++++++------------------
+ testdata/grepoutput |   8 ++++
+ 3 files changed, 85 insertions(+), 37 deletions(-)
+
+diff --git a/RunGrepTest b/RunGrepTest
+index f1b0348..766278b 100755
+--- a/RunGrepTest
++++ b/RunGrepTest
+@@ -506,6 +506,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes
+ (cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
+ echo "RC=$?" >>testtrygrep
+ 
++echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
++echo "a" >testtemp1grep
++echo "aaaaa" >>testtemp1grep
++(cd $srcdir; $valgrind $pcregrep  --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
++echo "RC=$?" >>testtrygrep
+ 
+ # Now compare the results.
+ 
+diff --git a/pcregrep.c b/pcregrep.c
+index 4f7fa38..b1af129 100644
+--- a/pcregrep.c
++++ b/pcregrep.c
+@@ -1582,11 +1582,14 @@ while (ptr < endptr)
+   int endlinelength;
+   int mrc = 0;
+   int startoffset = 0;
++  int prevoffsets[2]; 
+   unsigned int options = 0;
+   BOOL match;
+   char *matchptr = ptr;
+   char *t = ptr;
+   size_t length, linelength;
++  
++  prevoffsets[0] = prevoffsets[1] = -1; 
+ 
+   /* At this point, ptr is at the start of a line. We need to find the length
+   of the subject string to pass to pcre_exec(). In multiline mode, it is the
+@@ -1729,55 +1732,86 @@ while (ptr < endptr)
+       {
+       if (!invert)
+         {
+-        if (printname != NULL) fprintf(stdout, "%s:", printname);
+-        if (number) fprintf(stdout, "%d:", linenumber);
+-
+-        /* Handle --line-offsets */
+-
+-        if (line_offsets)
+-          fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
+-            offsets[1] - offsets[0]);
+-
+-        /* Handle --file-offsets */
+-
+-        else if (file_offsets)
+-          fprintf(stdout, "%d,%d\n",
+-            (int)(filepos + matchptr + offsets[0] - ptr),
+-            offsets[1] - offsets[0]);
+-
+-        /* Handle --only-matching, which may occur many times */
+-
+-        else
++        int oldstartoffset = startoffset;
++        
++        /* It is possible, when a lookbehind assertion contains \K, for the 
++        same string to be found again. The code below advances startoffset, but 
++        until it is past the "bumpalong" offset that gave the match, the same
++        substring will be returned. The PCRE1 library does not return the
++        bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
++        does this better.) */
++         
++        if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
+           {
+-          BOOL printed = FALSE;
+-          omstr *om;
+-
+-          for (om = only_matching; om != NULL; om = om->next)
++          prevoffsets[0] = offsets[0];
++          prevoffsets[1] = offsets[1]; 
++            
++          if (printname != NULL) fprintf(stdout, "%s:", printname);
++          if (number) fprintf(stdout, "%d:", linenumber);
++          
++          /* Handle --line-offsets */
++          
++          if (line_offsets)
++            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
++              offsets[1] - offsets[0]);
++          
++          /* Handle --file-offsets */
++          
++          else if (file_offsets)
++            fprintf(stdout, "%d,%d\n",
++              (int)(filepos + matchptr + offsets[0] - ptr),
++              offsets[1] - offsets[0]);
++          
++          /* Handle --only-matching, which may occur many times */
++          
++          else
+             {
+-            int n = om->groupnum;
+-            if (n < mrc)
++            BOOL printed = FALSE;
++            omstr *om;
++          
++            for (om = only_matching; om != NULL; om = om->next)
+               {
+-              int plen = offsets[2*n + 1] - offsets[2*n];
+-              if (plen > 0)
++              int n = om->groupnum;
++              if (n < mrc)
+                 {
+-                if (printed) fprintf(stdout, "%s", om_separator);
+-                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+-                FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
+-                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
+-                printed = TRUE;
++                int plen = offsets[2*n + 1] - offsets[2*n];
++                if (plen > 0)
++                  {
++                  if (printed) fprintf(stdout, "%s", om_separator);
++                  if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
++                  FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
++                  if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
++                  printed = TRUE;
++                  }
+                 }
+               }
++          
++            if (printed || printname != NULL || number) fprintf(stdout, "\n");
+             }
+-
+-          if (printed || printname != NULL || number) fprintf(stdout, "\n");
+-          }
+-
+-        /* Prepare to repeat to find the next match */
++          }   
++
++        /* Prepare to repeat to find the next match. If the patterned contained 
++        a lookbehind tht included \K, it is possible that the end of the match 
++        might be at or before the actual strting offset we have just used. We 
++        need to start one character further on. Unfortunately, for unanchored 
++        patterns, the actual start offset can be greater that the one that was 
++        set as a result of "bumpalong". PCRE1 does not return the actual start 
++        offset, so we have to check against the original start offset. This may 
++        lead to duplicates - we we need the fudge above to avoid printing them. 
++        (PCRE2 does this better.) */
+ 
+         match = FALSE;
+         if (line_buffered) fflush(stdout);
+         rc = 0;                      /* Had some success */
+         startoffset = offsets[1];    /* Restart after the match */
++        if (startoffset <= oldstartoffset)
++          {
++          if ((size_t)startoffset >= length) 
++            goto END_ONE_MATCH;              /* We were at the end */
++          startoffset = oldstartoffset + 1;
++          if (utf8)
++            while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;    
++          }   
+         goto ONLY_MATCHING_RESTART;
+         }
+       }
+@@ -1974,6 +2008,7 @@ while (ptr < endptr)
+   /* Advance to after the newline and increment the line number. The file
+   offset to the current line is maintained in filepos. */
+ 
++  END_ONE_MATCH:
+   ptr += linelength + endlinelength;
+   filepos += (int)(linelength + endlinelength);
+   linenumber++;
+diff --git a/testdata/grepoutput b/testdata/grepoutput
+index 9bf9d9d..4d61752 100644
+--- a/testdata/grepoutput
++++ b/testdata/grepoutput
+@@ -743,3 +743,11 @@ RC=0
+ ---------------------------- Test 106 -----------------------------
+ a
+ RC=0
++---------------------------- Test 107 -----------------------------
++1:0,1
++2:0,1
++2:1,1
++2:2,1
++2:3,1
++2:4,1
++RC=0
+-- 
+2.1.0
+
diff --git a/pcre.spec b/pcre.spec
index 0b7d06e..37c22da 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -56,6 +56,9 @@ Patch14: pcre-8.36-Fix-comment-between-subroutine-call-and-quantifier-b.patch
 # Fix compliation of mutual recursion inside a lookbehind assertion,
 # bug #1210417, in upstream after 8.36
 Patch15: pcre-8.36-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
+# Fix pcregrep loop when \K is used in a lookbehind assertion, bug #1210423,
+# in upstream after 8.36
+Patch16: pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
 BuildRequires: readline-devel
 # New libtool to get rid of rpath
 BuildRequires: autoconf, automake, libtool
@@ -111,6 +114,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
 %patch13 -p1 -b .mutual_recursion
 %patch14 -p1 -b .parenthesized_comment
 %patch15 -p1 -b .mutual_recursion_in_assertion
+%patch16 -p1 -b .pcregrep_k_loop
 # Because of rpath patch
 libtoolize --copy --force && autoreconf -vif
 # One contributor's name is non-UTF-8
@@ -189,6 +193,7 @@ make %{?_smp_mflags} check
 - Fix compilation of a parenthesized comment (bug #1210410)
 - Fix compliation of mutual recursion inside a lookbehind assertion
   (bug #1210417)
+- Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423)
 
 * Thu Nov 20 2014 Petr Pisar <ppisar at redhat.com> - 8.35-8
 - Fix CVE-2014-8964 (unused memory usage on zero-repeat assertion condition)
-- 
cgit v0.10.2


	http://pkgs.fedoraproject.org/cgit/pcre.git/commit/?h=f21&id=1684bcb83687ea4167cb29b890c26f0ca975190e


More information about the scm-commits mailing list