ppisar pushed to pcre (f20). "Fix pcregrep loop when \K is used in a lookbehind assertion"
notifications at fedoraproject.org
notifications at fedoraproject.org
Fri Apr 10 13:47:21 UTC 2015
>From 9cf7f81e4b1f5e46d00d8b79d8f63e29dfbbf0ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
Date: Thu, 9 Apr 2015 18:51:34 +0200
Subject: Fix pcregrep loop when \K is used in a lookbehind assertion
diff --git a/pcre-8.33-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch b/pcre-8.33-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
new file mode 100644
index 0000000..c1affd8
--- /dev/null
+++ b/pcre-8.33-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
@@ -0,0 +1,211 @@
+From caee72be186225496c82b51f22391a09f038dd36 Mon Sep 17 00:00:00 2001
+From: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Tue, 7 Apr 2015 15:52:11 +0000
+Subject: [PATCH] Fix pcregrep loop when \K is used in a lookbehind assertion.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Upstream commit ported to 8.33:
+
+commit b4332d7dd831b3547b3f541495de4a79554e538e
+Author: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date: Tue Apr 7 15:52:11 2015 +0000
+
+ Fix pcregrep loop when \K is used in a lookbehind assertion.
+
+ git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1543 2f5784b3-3f2a-0410-8824-cb99058d5e15
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ RunGrepTest | 5 +++
+ pcregrep.c | 109 ++++++++++++++++++++++++++++++++++------------------
+ testdata/grepoutput | 8 ++++
+ 3 files changed, 85 insertions(+), 37 deletions(-)
+
+diff --git a/RunGrepTest b/RunGrepTest
+index daaf8af..7a25a2f 100755
+--- a/RunGrepTest
++++ b/RunGrepTest
+@@ -502,6 +502,11 @@ echo "---------------------------- Test 105 -----------------------------" >>tes
+ (cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtry 2>&1
+ echo "RC=$?" >>testtry
+
++echo "---------------------------- Test 106 -----------------------------" >>testtry
++echo "a" >testtemp1grep
++echo "aaaaa" >>testtemp1grep
++(cd $srcdir; $valgrind $pcregrep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtry 2>&1
++echo "RC=$?" >>testtry
+
+ # Now compare the results.
+
+diff --git a/pcregrep.c b/pcregrep.c
+index 4adc20d..0125ddc 100644
+--- a/pcregrep.c
++++ b/pcregrep.c
+@@ -1541,11 +1541,14 @@ while (ptr < endptr)
+ int endlinelength;
+ int mrc = 0;
+ int startoffset = 0;
++ int prevoffsets[2];
+ unsigned int options = 0;
+ BOOL match;
+ char *matchptr = ptr;
+ char *t = ptr;
+ size_t length, linelength;
++
++ prevoffsets[0] = prevoffsets[1] = -1;
+
+ /* At this point, ptr is at the start of a line. We need to find the length
+ of the subject string to pass to pcre_exec(). In multiline mode, it is the
+@@ -1688,55 +1691,86 @@ while (ptr < endptr)
+ {
+ if (!invert)
+ {
+- if (printname != NULL) fprintf(stdout, "%s:", printname);
+- if (number) fprintf(stdout, "%d:", linenumber);
+-
+- /* Handle --line-offsets */
+-
+- if (line_offsets)
+- fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
+- offsets[1] - offsets[0]);
+-
+- /* Handle --file-offsets */
+-
+- else if (file_offsets)
+- fprintf(stdout, "%d,%d\n",
+- (int)(filepos + matchptr + offsets[0] - ptr),
+- offsets[1] - offsets[0]);
+-
+- /* Handle --only-matching, which may occur many times */
+-
+- else
++ int oldstartoffset = startoffset;
++
++ /* It is possible, when a lookbehind assertion contains \K, for the
++ same string to be found again. The code below advances startoffset, but
++ until it is past the "bumpalong" offset that gave the match, the same
++ substring will be returned. The PCRE1 library does not return the
++ bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
++ does this better.) */
++
++ if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
+ {
+- BOOL printed = FALSE;
+- omstr *om;
+-
+- for (om = only_matching; om != NULL; om = om->next)
++ prevoffsets[0] = offsets[0];
++ prevoffsets[1] = offsets[1];
++
++ if (printname != NULL) fprintf(stdout, "%s:", printname);
++ if (number) fprintf(stdout, "%d:", linenumber);
++
++ /* Handle --line-offsets */
++
++ if (line_offsets)
++ fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
++ offsets[1] - offsets[0]);
++
++ /* Handle --file-offsets */
++
++ else if (file_offsets)
++ fprintf(stdout, "%d,%d\n",
++ (int)(filepos + matchptr + offsets[0] - ptr),
++ offsets[1] - offsets[0]);
++
++ /* Handle --only-matching, which may occur many times */
++
++ else
+ {
+- int n = om->groupnum;
+- if (n < mrc)
++ BOOL printed = FALSE;
++ omstr *om;
++
++ for (om = only_matching; om != NULL; om = om->next)
+ {
+- int plen = offsets[2*n + 1] - offsets[2*n];
+- if (plen > 0)
++ int n = om->groupnum;
++ if (n < mrc)
+ {
+- if (printed) fprintf(stdout, "%s", om_separator);
+- if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+- FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
+- if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
+- printed = TRUE;
++ int plen = offsets[2*n + 1] - offsets[2*n];
++ if (plen > 0)
++ {
++ if (printed) fprintf(stdout, "%s", om_separator);
++ if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
++ FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
++ if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
++ printed = TRUE;
++ }
+ }
+ }
++
++ if (printed || printname != NULL || number) fprintf(stdout, "\n");
+ }
+-
+- if (printed || printname != NULL || number) fprintf(stdout, "\n");
+- }
+-
+- /* Prepare to repeat to find the next match */
++ }
++
++ /* Prepare to repeat to find the next match. If the patterned contained
++ a lookbehind tht included \K, it is possible that the end of the match
++ might be at or before the actual strting offset we have just used. We
++ need to start one character further on. Unfortunately, for unanchored
++ patterns, the actual start offset can be greater that the one that was
++ set as a result of "bumpalong". PCRE1 does not return the actual start
++ offset, so we have to check against the original start offset. This may
++ lead to duplicates - we we need the fudge above to avoid printing them.
++ (PCRE2 does this better.) */
+
+ match = FALSE;
+ if (line_buffered) fflush(stdout);
+ rc = 0; /* Had some success */
+ startoffset = offsets[1]; /* Restart after the match */
++ if (startoffset <= oldstartoffset)
++ {
++ if ((size_t)startoffset >= length)
++ goto END_ONE_MATCH; /* We were at the end */
++ startoffset = oldstartoffset + 1;
++ if (utf8)
++ while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
++ }
+ goto ONLY_MATCHING_RESTART;
+ }
+ }
+@@ -1933,6 +1967,7 @@ while (ptr < endptr)
+ /* Advance to after the newline and increment the line number. The file
+ offset to the current line is maintained in filepos. */
+
++ END_ONE_MATCH:
+ ptr += linelength + endlinelength;
+ filepos += (int)(linelength + endlinelength);
+ linenumber++;
+diff --git a/testdata/grepoutput b/testdata/grepoutput
+index cf04091..3e9dc68 100644
+--- a/testdata/grepoutput
++++ b/testdata/grepoutput
+@@ -740,3 +740,11 @@ RC=0
+ [1;31m[00m
+ [1;31m[00mtriple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+ RC=0
++---------------------------- Test 106 -----------------------------
++1:0,1
++2:0,1
++2:1,1
++2:2,1
++2:3,1
++2:4,1
++RC=0
+--
+2.1.0
+
diff --git a/pcre.spec b/pcre.spec
index 80862ae..be628ca 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -50,6 +50,9 @@ Patch12: pcre-8.33-Fix-a-bug-concerned-with-scanning-for-empty-string-m.patch
# Fix compliation of mutual recursion inside a lookbehind assertion,
# bug #1210417, in upstream after 8.36
Patch13: pcre-8.33-Fix-stack-overflow-instead-of-diagnostic-for-mutual-.patch
+# Fix pcregrep loop when \K is used in a lookbehind assertion, bug #1210423,
+# in upstream after 8.36
+Patch14: pcre-8.33-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
BuildRequires: readline-devel
# New libtool to get rid of rpath
BuildRequires: autoconf, automake, libtool
@@ -103,6 +106,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
%patch11 -p1 -b .parenthesized_comment
%patch12 -p1 -b .empty_string_check
%patch13 -p1 -b .mutual_recursion_in_assertion
+%patch14 -p1 -b .pcregrep_k_loop
# Because of rpath patch
libtoolize --copy --force && autoreconf -vif
# One contributor's name is non-UTF-8
@@ -172,6 +176,7 @@ make check
- Fix compilation of a parenthesized comment (bug #1210410)
- Fix compliation of mutual recursion inside a lookbehind assertion
(bug #1210417)
+- Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423)
* Tue Dec 02 2014 Petr Pisar <ppisar at redhat.com> - 8.33-8
- Fix CVE-2014-8964 (unused memory usage on zero-repeat assertion condition)
--
cgit v0.10.2
http://pkgs.fedoraproject.org/cgit/pcre.git/commit/?h=f20&id=9cf7f81e4b1f5e46d00d8b79d8f63e29dfbbf0ef
More information about the scm-commits
mailing list