[perl] Match non-breakable space with /[\h]/ in ASCII mode

Petr Pisar ppisar at fedoraproject.org
Tue Sep 11 12:35:31 UTC 2012


commit 01108a6d354e3dc615f808c3f0c21a179cab35aa
Author: Petr Písař <ppisar at redhat.com>
Date:   Tue Sep 11 09:53:31 2012 +0200

    Match non-breakable space with /[\h]/ in ASCII mode

 ...5.16.1-PATCH-perl-114220-h-not-equiv-to-h.patch |   83 ++++++++++++++++++++
 perl.spec                                          |    6 ++
 2 files changed, 89 insertions(+), 0 deletions(-)
---
diff --git a/perl-5.16.1-PATCH-perl-114220-h-not-equiv-to-h.patch b/perl-5.16.1-PATCH-perl-114220-h-not-equiv-to-h.patch
new file mode 100644
index 0000000..8d23010
--- /dev/null
+++ b/perl-5.16.1-PATCH-perl-114220-h-not-equiv-to-h.patch
@@ -0,0 +1,83 @@
+From ee895e3e46998560ad9b18ffcaf5852b1fbc5403 Mon Sep 17 00:00:00 2001
+From: Karl Williamson <public at khwilliamson.com>
+Date: Sat, 21 Jul 2012 12:12:33 -0600
+Subject: [PATCH] PATCH: [perl #114220] /\h/ not equiv to /[\h]/
+
+\h matches the No-Break space even under /d.  It is the only
+(non-complemented) Posix-like character class that has matches under /d
+in the Latin1 range above ASCII.  A special case is made for it, and \H
+to make sure they have the correct code points.
+---
+ regcomp.c     | 27 ++++++++++++++++++++++-----
+ t/re/re_tests |  5 +++++
+ 2 files changed, 27 insertions(+), 5 deletions(-)
+
+diff --git a/regcomp.c b/regcomp.c
+index 143f349..2254159 100644
+--- a/regcomp.c
++++ b/regcomp.c
+@@ -10885,6 +10885,7 @@ parseit:
+ 		 * A similar issue a little earlier when switching on value.
+ 		 * --jhi */
+ 		switch ((I32)namedclass) {
++		    int i;  /* loop counter */
+ 
+ 		case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
+ 		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+@@ -10957,16 +10958,32 @@ parseit:
+                         PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
+ 		    break;
+ 		case ANYOF_HORIZWS:
+-		    /* For these, we use the nonbitmap, as /d doesn't make a
+-		     * difference in what these match.  There would be problems
+-		     * if these characters had folds other than themselves, as
+-		     * nonbitmap is subject to folding.  It turns out that \h
+-		     * is just a synonym for XPosixBlank */
++		    /* NBSP matches this, and needs to be added unconditionally
++		     * to the bit map as it matches even under /d, unlike all
++		     * the rest of the Posix-like classes (\v doesn't have any
++		     * matches in the Latin1 range, so it is unaffected.) which
++		     * Otherwise, we use the nonbitmap, as /d doesn't make a
++		     * difference in what these match.  It turns out that \h is
++		     * just a synonym for XPosixBlank */
+ 		    _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
++		    stored += set_regclass_bit(pRExC_state, ret,
++					       UNI_TO_NATIVE(0xA0),
++					       &l1_fold_invlist,
++					       &unicode_alternate);
++
+ 		    break;
+ 		case ANYOF_NHORIZWS:
+                     _invlist_union_complement_2nd(nonbitmap,
+                                                  PL_XPosixBlank, &nonbitmap);
++		    for (i = 128; i < 256; i++) {
++			if (i == 0xA0) {
++			    continue;
++			}
++			stored += set_regclass_bit(pRExC_state, ret,
++						   UNI_TO_NATIVE(i),
++						   &l1_fold_invlist,
++						   &unicode_alternate);
++		    }
+ 		    break;
+ 		case ANYOF_LOWER:
+ 		case ANYOF_NLOWER:
+diff --git a/t/re/re_tests b/t/re/re_tests
+index 4d78a6a..cfc813f 100644
+--- a/t/re/re_tests
++++ b/t/re/re_tests
+@@ -1595,6 +1595,11 @@ abc\N{def	-	c	-	\\N{NAME} must be resolved by the lexer
+ /[[:word:]]/	\x{2c1}	y	-	-
+ /[[:word:]]/	\x{2c2}	n	-	-
+ 
++# [perl #114220]
++/[\h]/	\x{A0}	y	$&	\xA0
++/[\H]/	\x{BF}	y	$&	\xBF
++/[\H]/	\x{A0}	n	-	-
++/[\H]/	\x{A1}	y	$&	\xA1
+ 
+ 
+ # vim: softtabstop=0 noexpandtab
+-- 
+1.7.11.4
+
diff --git a/perl.spec b/perl.spec
index 0eb69f3..f12d2ab 100644
--- a/perl.spec
+++ b/perl.spec
@@ -85,6 +85,9 @@ Patch10:        perl-5.16.0-fix-broken-atof.patch
 # Do not access freed memory when cloning thread, rhbz#825749, RT#111610
 Patch11:        perl-5.16.1-perl-111610-Trouble-with-XS-APItest-t-clone-with-sta.patch
 
+# Match non-breakable space with /[\h]/ in ASCII mode, rhbz#844919, RT#114220
+Patch12:        perl-5.16.1-PATCH-perl-114220-h-not-equiv-to-h.patch
+
 # Fix searching for Unicode::Collate::Locale data, rhbz#756118, CPANRT#72666,
 # fixed in Unicode-Collate-0.87.
 # TODO Looks like it was fixed differently?
@@ -1323,6 +1326,7 @@ tarball from perl.org.
 %patch9 -p1
 %patch10 -p1
 %patch11 -p1
+%patch12 -p1
 
 #copy the example script
 cp -a %{SOURCE5} .
@@ -1526,6 +1530,7 @@ pushd %{build_archlib}/CORE/
     'Fedora Patch9: Fix find2perl to translate ? glob properly (RT#113054)' \
     'Fedora Patch10: Fix broken atof (RT#109318)' \
     'Fedora Patch11: Do not access freed memory when cloning thread (RT#111610)' \
+    'Fedora Patch12: Match non-breakable space with /[\h]/ in ASCII mode (RT#114220)' \
     %{nil}
 
 rm patchlevel.bak
@@ -2639,6 +2644,7 @@ sed \
 %changelog
 * Tue Sep 11 2012 Petr Pisar <ppisar at redhat.com> - 4:5.16.1-233
 - Do not access freed memory when cloning thread (bug #825749)
+- Match non-breakable space with /[\h]/ in ASCII mode (bug #844919)
 
 * Wed Sep 05 2012 Petr Pisar <ppisar at redhat.com> - 4:5.16.1-232
 - Move App::Cpan from perl-Test-Harness to perl-CPAN (bug #854577)



More information about the perl-devel mailing list