[pcre/f15] Fix case-less match if cases differ in encoding length
Petr Pisar
ppisar at fedoraproject.org
Fri Dec 2 12:33:21 UTC 2011
commit 96046ad8df93079b418bc871a0d8bb9215f9d227
Author: Petr Písař <ppisar at redhat.com>
Date: Fri Dec 2 11:47:19 2011 +0100
Fix case-less match if cases differ in encoding length
pcre-8.12-caseless_different_length.patch | 150 +++++++++++++++++++++++++++++
pcre.spec | 9 ++-
2 files changed, 158 insertions(+), 1 deletions(-)
---
diff --git a/pcre-8.12-caseless_different_length.patch b/pcre-8.12-caseless_different_length.patch
new file mode 100644
index 0000000..5cd1513
--- /dev/null
+++ b/pcre-8.12-caseless_different_length.patch
@@ -0,0 +1,150 @@
+From 484e68d7976d2d8ea2988e449e34234e235ce302 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar at redhat.com>
+Date: Fri, 2 Dec 2011 13:11:55 +0100
+Subject: [PATCH] Fix caseless match if cases differ in encoding length
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From:
+r778 | ph10 | 2011-12-01 18:38:47 +0100 (Čt, 01 pro 2011) | 3 lines
+
+Fix bug with caseless matching of characters of different lengths when
+the shorter is right at the end of the subject.
+
+Petr Pisar: Changelog entry removed.
+---
+ pcre_exec.c | 32 ++++++++++++++++----------------
+ testdata/testinput6 | 14 ++++++++++++++
+ testdata/testoutput6 | 22 ++++++++++++++++++++++
+ 3 files changed, 52 insertions(+), 16 deletions(-)
+
+diff --git a/pcre_exec.c b/pcre_exec.c
+index caf5fc3..2b7c5bd 100644
+--- a/pcre_exec.c
++++ b/pcre_exec.c
+@@ -432,7 +432,7 @@ returns a negative (error) response, the outer incarnation must also return the
+ same response. */
+
+ /* These macros pack up tests that are used for partial matching, and which
+-appears several times in the code. We set the "hit end" flag if the pointer is
++appear several times in the code. We set the "hit end" flag if the pointer is
+ at the end of the subject and also past the start of the subject (i.e.
+ something has been matched). For hard partial matching, we then return
+ immediately. The second one is used when we already know we are past the end of
+@@ -2743,31 +2743,36 @@ for (;;)
+ }
+ break;
+
+- /* Match a single character, caselessly */
++ /* Match a single character, caselessly. If we are at the end of the
++ subject, give up immediately. */
+
+ case OP_CHARNC:
++ if (eptr >= md->end_subject)
++ {
++ SCHECK_PARTIAL();
++ MRRETURN(MATCH_NOMATCH);
++ }
++
+ #ifdef SUPPORT_UTF8
+ if (utf8)
+ {
+ length = 1;
+ ecode++;
+ GETCHARLEN(fc, ecode, length);
+-
+- if (length > md->end_subject - eptr)
+- {
+- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
+- MRRETURN(MATCH_NOMATCH);
+- }
+-
++
+ /* If the pattern character's value is < 128, we have only one byte, and
+- can use the fast lookup table. */
++ we know that its other case must also be one byte long, so we can use the
++ fast lookup table. We know that there is at least one byte left in the
++ subject. */
+
+ if (fc < 128)
+ {
+ if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+ }
+
+- /* Otherwise we must pick up the subject character */
++ /* Otherwise we must pick up the subject character. Note that we cannot
++ use the value of "length" to check for sufficient bytes left, because the
++ other case of the character may have more or fewer bytes. */
+
+ else
+ {
+@@ -2792,11 +2797,6 @@ for (;;)
+
+ /* Non-UTF-8 mode */
+ {
+- if (md->end_subject - eptr < 1)
+- {
+- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
+- MRRETURN(MATCH_NOMATCH);
+- }
+ if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+ ecode += 2;
+ }
+diff --git a/testdata/testinput6 b/testdata/testinput6
+index 503a5bc..c92140c 100644
+--- a/testdata/testinput6
++++ b/testdata/testinput6
+@@ -802,4 +802,18 @@
+ ** Failers
+ a\xFCb
+
++/ⱥ/8i
++ ⱥ
++ Ⱥx
++ Ⱥ
++
++/[ⱥ]/8i
++ ⱥ
++ Ⱥx
++ Ⱥ
++
++/Ⱥ/8i
++ Ⱥ
++ ⱥ
++
+ /-- End of testinput6 --/
+diff --git a/testdata/testoutput6 b/testdata/testoutput6
+index 6a9ec83..0ada170 100644
+--- a/testdata/testoutput6
++++ b/testdata/testoutput6
+@@ -1353,4 +1353,26 @@ No match
+ a\xFCb
+ No match
+
++/ⱥ/8i
++ ⱥ
++ 0: \x{2c65}
++ Ⱥx
++ 0: \x{23a}
++ Ⱥ
++ 0: \x{23a}
++
++/[ⱥ]/8i
++ ⱥ
++ 0: \x{2c65}
++ Ⱥx
++ 0: \x{23a}
++ Ⱥ
++ 0: \x{23a}
++
++/Ⱥ/8i
++ Ⱥ
++ 0: \x{23a}
++ ⱥ
++ 0: \x{2c65}
++
+ /-- End of testinput6 --/
+--
+1.7.7.4
+
diff --git a/pcre.spec b/pcre.spec
index 7daf46a..6385605 100644
--- a/pcre.spec
+++ b/pcre.spec
@@ -1,6 +1,6 @@
Name: pcre
Version: 8.12
-Release: 4%{?dist}
+Release: 5%{?dist}
Summary: Perl-compatible regular expression library
URL: http://www.pcre.org/
Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/%{name}-%{version}.tar.bz2
@@ -9,6 +9,9 @@ Patch0: pcre-8.10-multilib.patch
Patch1: pcre-8.12-caseless_reference.patch
# Fix repeated forward reference, in upstream after 8.20.
Patch2: pcre-8.12-forward_reference.patch
+# Fix case-less match if cases differ in encoding length, in upstream after
+# 8.20.
+Patch3: pcre-8.12-caseless_different_length.patch
License: BSD
Group: System Environment/Libraries
BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
@@ -45,6 +48,7 @@ Library for static linking for %{name}.
libtoolize --copy --force && autoreconf
%patch1 -p0 -b .caseless_reference
%patch2 -p1 -b .forward_reference
+%patch3 -p1 -b .caseless_different_length
# One contributor's name is non-UTF-8
for F in ChangeLog; do
iconv -f latin1 -t utf8 "$F" >"${F}.utf8"
@@ -109,6 +113,9 @@ rm -rf $RPM_BUILD_ROOT
%doc COPYING LICENCE
%changelog
+* Fri Dec 02 2011 Petr Pisar <ppisar at redhat.com> - 8.12-5
+- Fix case-less match if cases differ in encoding length (bug #756675)
+
* Tue Nov 22 2011 Petr Pisar <ppisar at redhat.com> - 8.12-4
- Fix repeated forward reference (bug #755969)
More information about the scm-commits
mailing list