[pcre] Fix case-less match if cases differ in encoding length

Fri Dec 2 10:50:06 UTC 2011

commit 789dda6d1e8ff827e91bb7c614419573eae3620c
Author: Petr Písař <ppisar at redhat.com>
Date:   Fri Dec 2 11:47:19 2011 +0100

    Fix case-less match if cases differ in encoding length

 pcre-8.20-caseless_different_length.patch |  150 +++++++++++++++++++++++++++++
 pcre.spec                                 |    9 ++-
 2 files changed, 158 insertions(+), 1 deletions(-)
---

diff --git a/pcre-8.20-caseless_different_length.patch b/pcre-8.20-caseless_different_length.patch
new file mode 100644
index 0000000..88843e5
--- /dev/null
+++ b/pcre-8.20-caseless_different_length.patch
@@ -0,0 +1,150 @@
+From 72a4bb52e09d46af0b00dd4064f93e9948fdad51 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
+Date: Fri, 2 Dec 2011 11:36:54 +0100
+Subject: [PATCH] Fix caseless match if cases differ in encoding length
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From:
+r778 | ph10 | 2011-12-01 18:38:47 +0100 (Čt, 01 pro 2011) | 3 lines
+
+Fix bug with caseless matching of characters of different lengths when
+the shorter is right at the end of the subject.
+
+Petr Pisar: Changelog entry removed.
+---
+ pcre_exec.c          |   32 ++++++++++++++++----------------
+ testdata/testinput6  |   14 ++++++++++++++
+ testdata/testoutput6 |   22 ++++++++++++++++++++++
+ 3 files changed, 52 insertions(+), 16 deletions(-)
+
+diff --git a/pcre_exec.c b/pcre_exec.c
+index 2e763d1..9881bdd 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -427,7 +427,7 @@ returns a negative (error) response, the outer incarnation must also return the
+ same response. */
+ 
+ /* These macros pack up tests that are used for partial matching, and which
+-appears several times in the code. We set the "hit end" flag if the pointer is
++appear several times in the code. We set the "hit end" flag if the pointer is
+ at the end of the subject and also past the start of the subject (i.e.
+ something has been matched). For hard partial matching, we then return
+ immediately. The second one is used when we already know we are past the end of
+@@ -3039,31 +3039,36 @@ for (;;)
+       }
+     break;
+ 
+-    /* Match a single character, caselessly */
++    /* Match a single character, caselessly. If we are at the end of the 
++    subject, give up immediately. */
+ 
+     case OP_CHARI:
++    if (eptr >= md->end_subject)
++      {
++      SCHECK_PARTIAL(); 
++      MRRETURN(MATCH_NOMATCH); 
++      }   
++ 
+ #ifdef SUPPORT_UTF8
+     if (utf8)
+       {
+       length = 1;
+       ecode++;
+       GETCHARLEN(fc, ecode, length);
+-
+-      if (length > md->end_subject - eptr)
+-        {
+-        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
+-        MRRETURN(MATCH_NOMATCH);
+-        }
+-
++ 
+       /* If the pattern character's value is < 128, we have only one byte, and
+-      can use the fast lookup table. */
++      we know that its other case must also be one byte long, so we can use the
++      fast lookup table. We know that there is at least one byte left in the 
++      subject. */
+ 
+       if (fc < 128)
+         {
+         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+         }
+ 
+-      /* Otherwise we must pick up the subject character */
++      /* Otherwise we must pick up the subject character. Note that we cannot
++      use the value of "length" to check for sufficient bytes left, because the
++      other case of the character may have more or fewer bytes.  */
+ 
+       else
+         {
+@@ -3088,11 +3093,6 @@ for (;;)
+ 
+     /* Non-UTF-8 mode */
+       {
+-      if (md->end_subject - eptr < 1)
+-        {
+-        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
+-        MRRETURN(MATCH_NOMATCH);
+-        }
+       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+       ecode += 2;
+       }
+diff --git a/testdata/testinput6 b/testdata/testinput6
+index e5fc0e9..6b0d2f7 100644
+--- a/testdata/testinput6
++++ b/testdata/testinput6
+@@ -802,4 +802,18 @@
+     ** Failers 
+     a\xFCb   
+ 
++/ⱥ/8i
++    ⱥ
++    Ⱥx 
++    Ⱥ 
++
++/[ⱥ]/8i
++    ⱥ
++    Ⱥx 
++    Ⱥ 
++
++/Ⱥ/8i
++    Ⱥ
++    ⱥ
++
+ /-- End of testinput6 --/
+diff --git a/testdata/testoutput6 b/testdata/testoutput6
+index 1acaa23..68c0a46 100644
+--- a/testdata/testoutput6
++++ b/testdata/testoutput6
+@@ -1353,4 +1353,26 @@ No match
+     a\xFCb   
+ No match
+ 
++/ⱥ/8i
++    ⱥ
++ 0: \x{2c65}
++    Ⱥx 
++ 0: \x{23a}
++    Ⱥ 
++ 0: \x{23a}
++
++/[ⱥ]/8i
++    ⱥ
++ 0: \x{2c65}
++    Ⱥx 
++ 0: \x{23a}
++    Ⱥ 
++ 0: \x{23a}
++
++/Ⱥ/8i
++    Ⱥ
++ 0: \x{23a}
++    ⱥ
++ 0: \x{2c65}
++
+ /-- End of testinput6 --/
+-- 
+1.7.7.4
+
diff --git a/pcre.spec b/pcre.spec
index 0bc3fb4..280f0cf 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -1,7 +1,7 @@
 # This is stable release: %%global rcversion RC3
 Name: pcre
 Version: 8.20
-Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist}
+Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist}
 %global myversion %{version}%{?rcversion:-%rcversion}
 Summary: Perl-compatible regular expression library
 Group: System Environment/Libraries
@@ -19,6 +19,9 @@ Patch3: pcre-8.20-lookbehind-2.patch
 Patch4: pcre-8.20-forward_reference.patch
 # Fix cache-flush in JIT on PPC, in upstream after 8.20.
 Patch5: pcre-8.20-ppcjit.patch
+# Fix case-less match if cases differ in encoding length, in upstream after
+# 8.20.
+Patch6: pcre-8.20-caseless_different_length.patch
 BuildRequires: readline-devel
 # New libtool to get rid of rpath
 BuildRequires: autoconf, automake, libtool
@@ -63,6 +66,7 @@ libtoolize --copy --force && autoreconf
 %patch3 -p1 -b .lookbehind2
 %patch4 -p0 -b .forward_reference
 %patch5 -p0 -b .ppcjit
+%patch6 -p1 -b .caseless_different_length
 # One contributor's name is non-UTF-8
 for F in ChangeLog; do
     iconv -f latin1 -t utf8 "$F" >"${F}.utf8"
@@ -133,6 +137,9 @@ make check
 %{_mandir}/man1/pcretest.*
 
 %changelog
+* Fri Dec 02 2011 Petr Pisar <ppisar at redhat.com> - 8.20-7
+- Fix case-less match if cases differ in encoding length (bug #756675)
+
 * Fri Nov 25 2011 Petr Pisar <ppisar at redhat.com> - 8.20-6
 - Fix cache-flush in JIT on PPC