[perl/f16] Match starting byte in non-UTF-8 mode

Petr Pisar ppisar at fedoraproject.org
Tue Sep 11 16:40:07 UTC 2012


commit adae00b1b7664a80ab54c820762ecea45a4fb7b2
Author: Petr Písař <ppisar at redhat.com>
Date:   Tue Sep 11 16:23:26 2012 +0200

    Match starting byte in non-UTF-8 mode

 ...erl-101710-Regression-with-i-latin1-chars.patch |   83 ++++++++++++++++++++
 perl.spec                                          |    6 ++
 2 files changed, 89 insertions(+), 0 deletions(-)
---
diff --git a/perl-5.14.2-PATCH-perl-101710-Regression-with-i-latin1-chars.patch b/perl-5.14.2-PATCH-perl-101710-Regression-with-i-latin1-chars.patch
new file mode 100644
index 0000000..2cbba13
--- /dev/null
+++ b/perl-5.14.2-PATCH-perl-101710-Regression-with-i-latin1-chars.patch
@@ -0,0 +1,83 @@
+From 4cf15b2d9def61a3bb49ac9b8729179fb6ce17c6 Mon Sep 17 00:00:00 2001
+From: Karl Williamson <public at khwilliamson.com>
+Date: Tue, 1 Nov 2011 17:57:15 -0600
+Subject: [PATCH] PATCH: [perl #101710] Regression with /i, latin1 chars.
+
+Petr Pisar: Ported to 5.14.2:
+
+From bbdd8bad57f8d77a4e6c3725a49d4d3589efedd7 Mon Sep 17 00:00:00 2001
+From: Karl Williamson <public at khwilliamson.com>
+Date: Tue, 1 Nov 2011 17:57:15 -0600
+Subject: [PATCH] PATCH: [perl #101710] Regression with /i, latin1 chars.
+
+The root cause of this bug is that it was assuming that a string was in
+utf8 when it wasn't, and so was thinking that a byte was a starter byte
+that wasn't, so was skipping ahead based on that starter byte.
+---
+ pod/perldelta.pod | 8 ++++++++
+ regexec.c         | 2 +-
+ t/re/pat.t        | 9 ++++++++-
+ 3 files changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/pod/perldelta.pod b/pod/perldelta.pod
+index 425708f..55e6e27 100644
+--- a/pod/perldelta.pod
++++ b/pod/perldelta.pod
+@@ -168,6 +168,14 @@ A panic involving the combination of the regular expression modifiers
+ C</aa> introduced in 5.14.0 and the C<\b> escape sequence has been
+ fixed [perl #95964].
+ 
++=item *
++
++A regression has been fixed that was introduced in 5.14, in C</i>
++regular expression matching, in which a match improperly fails if the
++pattern is in UTF-8, the target string is not, and a Latin-1 character
++precedes a character in the string that should match the pattern.  [perl
++#101710]
++
+ =back
+ 
+ =head1 Known Problems
+diff --git a/regexec.c b/regexec.c
+index 0dc093f..2354be1 100644
+--- a/regexec.c
++++ b/regexec.c
+@@ -1521,7 +1521,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
+ 		{
+ 		    goto got_it;
+ 		}
+-		s += UTF8SKIP(s);
++		s += (utf8_target) ? UTF8SKIP(s) : 1;
+ 	    }
+ 	    break;
+ 	case BOUNDL:
+diff --git a/t/re/pat.t b/t/re/pat.t
+index 4ef9663..4eb05c6 100644
+--- a/t/re/pat.t
++++ b/t/re/pat.t
+@@ -21,7 +21,7 @@ BEGIN {
+     require './test.pl';
+ }
+ 
+-plan tests => 451;  # Update this when adding/deleting tests.
++plan tests => 452;  # Update this when adding/deleting tests.
+ 
+ run_tests() unless caller;
+ 
+@@ -1167,6 +1167,13 @@ sub run_tests {
+         is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state');
+     }
+ 
++
++    { # [perl #101710]
++        my $pat = "b";
++        utf8::upgrade($pat);
++        like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string");
++    }
++
+ } # End of sub run_tests
+ 
+ 1;
+-- 
+1.7.11.4
+
diff --git a/perl.spec b/perl.spec
index 63b97a4..fe8012c 100644
--- a/perl.spec
+++ b/perl.spec
@@ -102,6 +102,9 @@ Patch17:        perl-5.14.2-RT-113730-should-be-cleared-on-do-IO-error.patch
 # Do not truncate syscall() return value to 32 bits, rhbz#838551, RT#113980
 Patch18:        perl-5.16.1-perl-113980-pp_syscall-I32-retval-truncates-the-retu.patch
 
+# Match starting byte in non-UTF-8 mode, rhbz#801739, RT#101710
+Patch19:        perl-5.14.2-PATCH-perl-101710-Regression-with-i-latin1-chars.patch
+
 # Update some of the bundled modules
 # see http://fedoraproject.org/wiki/Perl/perl.spec for instructions
 
@@ -1152,6 +1155,7 @@ tarball from perl.org.
 %patch16 -p1
 %patch17 -p1
 %patch18 -p1
+%patch19 -p1
 
 #copy the example script
 cp -a %{SOURCE5} .
@@ -1351,6 +1355,7 @@ pushd %{build_archlib}/CORE/
     'Fedora Patch16: Fix find2perl to translate ? glob properly (RT#113054)' \
     'Fedora Patch17: Clear $@ before "do" I/O error (RT#113730)' \
     'Fedora Patch18: Do not truncate syscall() return value to 32 bits (RT#113980)' \
+    'Fedora Patch19: Match starting byte in non-UTF-8 mode (RT#101710)' \
     %{nil}
 
 rm patchlevel.bak
@@ -2243,6 +2248,7 @@ sed \
 * Tue Sep 11 2012 Petr Pisar <ppisar at redhat.com> - 4:5.14.2-200
 - Clear $@ before `do' I/O error (bug #834226)
 - Do not truncate syscall() return value to 32 bits (bug #838551)
+- Match starting byte in non-UTF-8 mode (bug #801739)
 
 * Wed Sep 05 2012 Petr Pisar <ppisar at redhat.com> - 4:5.14.2-199
 - Remove perl-devel dependency from perl-Test-Harness and perl-Test-Simple


More information about the scm-commits mailing list