ppisar pushed to pcre (f22). "Fix backtracking for \C\X* in UTF-8 mode"

notifications at fedoraproject.org notifications at fedoraproject.org
Fri Apr 10 06:56:59 UTC 2015


>From f9510ab536fe65b890ad5efeb83f2f0965754388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
Date: Fri, 10 Apr 2015 08:11:38 +0200
Subject: Fix backtracking for \C\X* in UTF-8 mode


diff --git a/pcre-8.36-Fix-backtracking-bug-for-C-X-in-UTF-mode.patch b/pcre-8.36-Fix-backtracking-bug-for-C-X-in-UTF-mode.patch
new file mode 100644
index 0000000..1141d75
--- /dev/null
+++ b/pcre-8.36-Fix-backtracking-bug-for-C-X-in-UTF-mode.patch
@@ -0,0 +1,146 @@
+From 9d8c223a96fde36fc21307abad702ed747135809 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
+Date: Fri, 10 Apr 2015 08:17:18 +0200
+Subject: [PATCH] Fix backtracking bug for \C\X* in UTF mode.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Upstream commits ported to 8.36:
+
+commit 4a81b0ca19da65ea9a50c208017a74e55c3fd027
+Author: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date:   Wed Apr 8 16:34:24 2015 +0000
+
+    Fix backtracking bug for \C\X* in UTF mode.
+
+    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1545 2f5784b3-3f2a-0410-8824-cb99058d5e15
+
+commit 6719c2cdeb7670d4bf10f15a8511ca15af7ea595
+Author: ph10 <ph10 at 2f5784b3-3f2a-0410-8824-cb99058d5e15>
+Date:   Wed Apr 8 16:56:28 2015 +0000
+
+    Fix other cases of backtracking crashes after \C in UTF mode.
+
+    git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1546 2f5784b3-3f2a-0410-8824-
+cb99058d5e15
+
+Signed-off-by: Petr Písař <ppisar at redhat.com>
+---
+ pcre_exec.c          | 19 ++++++++++++-------
+ testdata/testinput4  |  6 ++++++
+ testdata/testoutput4 |  8 ++++++++
+ 3 files changed, 26 insertions(+), 7 deletions(-)
+
+diff --git a/pcre_exec.c b/pcre_exec.c
+index bb5620d..6c9f4d7 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -3490,7 +3490,7 @@ for (;;)
+           if (possessive) continue;    /* No backtracking */
+           for(;;)
+             {
+-            if (eptr == pp) goto TAIL_RECURSE;
++            if (eptr <= pp) goto TAIL_RECURSE;
+             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
+             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ #ifdef SUPPORT_UCP
+@@ -3911,7 +3911,7 @@ for (;;)
+           if (possessive) continue;    /* No backtracking */
+           for(;;)
+             {
+-            if (eptr == pp) goto TAIL_RECURSE;
++            if (eptr <= pp) goto TAIL_RECURSE;
+             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
+             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+             eptr--;
+@@ -4046,7 +4046,7 @@ for (;;)
+           if (possessive) continue;    /* No backtracking */
+           for(;;)
+             {
+-            if (eptr == pp) goto TAIL_RECURSE;
++            if (eptr <= pp) goto TAIL_RECURSE;
+             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
+             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+             eptr--;
+@@ -5617,7 +5617,7 @@ for (;;)
+         if (possessive) continue;    /* No backtracking */
+         for(;;)
+           {
+-          if (eptr == pp) goto TAIL_RECURSE;
++          if (eptr <= pp) goto TAIL_RECURSE;
+           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
+           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+           eptr--;
+@@ -5659,12 +5659,17 @@ for (;;)
+ 
+         if (possessive) continue;    /* No backtracking */
+ 
++        /* We use <= pp rather than == pp to detect the start of the run while
++        backtracking because the use of \C in UTF mode can cause BACKCHAR to
++        move back past pp. This is just palliative; the use of \C in UTF mode
++        is fraught with danger. */
++
+         for(;;)
+           {
+           int lgb, rgb;
+           PCRE_PUCHAR fptr;
+ 
+-          if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
++          if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
+           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
+           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ 
+@@ -5682,7 +5687,7 @@ for (;;)
+ 
+           for (;;)
+             {
+-            if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
++            if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
+             fptr = eptr - 1;
+             if (!utf) c = *fptr; else
+               {
+@@ -5932,7 +5937,7 @@ for (;;)
+         if (possessive) continue;    /* No backtracking */
+         for(;;)
+           {
+-          if (eptr == pp) goto TAIL_RECURSE;
++          if (eptr <= pp) goto TAIL_RECURSE;
+           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
+           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+           eptr--;
+diff --git a/testdata/testinput4 b/testdata/testinput4
+index 04fca1a..f139c62 100644
+--- a/testdata/testinput4
++++ b/testdata/testinput4
+@@ -724,4 +724,10 @@
+ 
+ "[\S\V\H]"8
+ 
++/\C\X*QT/8
++    Ӆ\x0aT
++
++/\C(\W?ſ)'?{{/8
++    \\C(\\W?ſ)'?{{
++
+ /-- End of testinput4 --/
+diff --git a/testdata/testoutput4 b/testdata/testoutput4
+index 340a949..ffd02ea 100644
+--- a/testdata/testoutput4
++++ b/testdata/testoutput4
+@@ -1273,4 +1273,12 @@ No match
+ 
+ "[\S\V\H]"8
+ 
++/\C\X*QT/8
++    Ӆ\x0aT
++No match
++
++/\C(\W?ſ)'?{{/8
++    \\C(\\W?ſ)'?{{
++No match
++
+ /-- End of testinput4 --/
+-- 
+2.1.0
+
diff --git a/pcre.spec b/pcre.spec
index 92841f6..eefc168 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -40,6 +40,9 @@ Patch9: pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
 # Fix pcretest loop when \K is used in a lookbehind assertion, bug #1210423,
 # in upstream after 8.36
 Patch10: pcre-8.36-Fix-pcretest-loop-for-K-in-lookbehind-assertion.patch
+# Fix backtracking for \C\X* in UTF-8 mode, bug #1210576,
+# in upstream after 8.36
+Patch11: pcre-8.36-Fix-backtracking-bug-for-C-X-in-UTF-mode.patch
 BuildRequires: readline-devel
 # New libtool to get rid of rpath
 BuildRequires: autoconf, automake, libtool
@@ -90,6 +93,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
 %patch8 -p1 -b .mutual_recursion_in_assertion
 %patch9 -p1 -b .pcregrep_k_loop
 %patch10 -p1 -b .pcretest_k_loop
+%patch11 -p1 -b .backtracking_cx8
 # Because of rpath patch
 libtoolize --copy --force && autoreconf -vif
 # One contributor's name is non-UTF-8
@@ -170,6 +174,7 @@ make %{?_smp_mflags} check VERBOSE=yes
   (bug #1210417)
 - Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423)
 - Fix pcretest loop when \K is used in a lookbehind assertion (bug #1210423)
+- Fix backtracking for \C\X* in UTF-8 mode (bug #1210576)
 
 * Thu Mar 26 2015 Petr Pisar <ppisar at redhat.com> - 8.36-4
 - Fix computing size of JIT read-only data (bug #1206131)
-- 
cgit v0.10.2


	http://pkgs.fedoraproject.org/cgit/pcre.git/commit/?h=f22&id=f9510ab536fe65b890ad5efeb83f2f0965754388


More information about the scm-commits mailing list