[grep] Added grep-2.7-const-range-exp.patch

Jaroslav Škarvada jskarvad at fedoraproject.org
Sun Oct 3 20:48:25 UTC 2010


commit fdca3f9a8ff9059993b61917c019deb02411e8d2
Author: Jaroslav Škarvada <jskarvad at redhat.com>
Date:   Sun Oct 3 22:47:56 2010 +0200

    Added grep-2.7-const-range-exp.patch

 grep-2.7-const-range-exp.patch |   64 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 64 insertions(+), 0 deletions(-)
---
diff --git a/grep-2.7-const-range-exp.patch b/grep-2.7-const-range-exp.patch
new file mode 100644
index 0000000..102bdd8
--- /dev/null
+++ b/grep-2.7-const-range-exp.patch
@@ -0,0 +1,64 @@
+commit 99d3c7e1308beb1ce9a3c535ca4b6581ebd653ee
+Author: Paolo Bonzini <bonzini at gnu.org>
+Date:   Tue Sep 21 17:00:55 2010 +0200
+
+    dfa: process range expressions consistently with system regex
+    
+    The actual meaning of range expressions in glibc is not exactly strcoll,
+    which makes the behavior of grep hard to predict when compiled with the
+    system regex.  Leave to the system regex matcher the decision of which
+    single-byte characters are matched by a range expression.
+    
+    This partially reverts a change made in commit 0d38a8bb (which made
+    sense at the time, but not now that src/dfa.c is not doing multibyte
+    character set matching anymore).
+    
+    * src/dfa.c (in_coll_range): Remove.
+    (parse_bracket_exp): Use system regex to find which single-char
+    bytes match a range expression.
+
+diff --git a/src/dfa.c b/src/dfa.c
+index a2f4174..f3e066f 100644
+--- a/src/dfa.c
++++ b/src/dfa.c
+@@ -697,13 +697,6 @@ static unsigned char const *buf_end;	/* reference to end in dfaexec().  */
+ 
+ #endif /* MBS_SUPPORT */
+ 
+-static int
+-in_coll_range (char ch, char from, char to)
+-{
+-  char c[6] = { from, 0, ch, 0, to, 0 };
+-  return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
+-}
+-
+ typedef int predicate (int);
+ 
+ /* The following list maps the names of the Posix named character classes
+@@ -979,10 +972,22 @@ parse_bracket_exp (void)
+                 for (c = c1; c <= c2; c++)
+                   setbit_case_fold (c, ccl);
+               else
+-                for (c = 0; c < NOTCHAR; ++c)
+-                  if (!(case_fold && isupper (c))
+-                      && in_coll_range (c, c1, c2))
+-                    setbit_case_fold (c, ccl);
++                {
++                  /* Defer to the system regex library about the meaning
++                     of range expressions.  */
++                  regex_t re;
++                  char pattern[6] = { '[', c1, '-', c2, ']', 0 };
++                  char subject[2] = { 0, 0 };
++                  regcomp (&re, pattern, REG_NOSUB);
++                  for (c = 0; c < NOTCHAR; ++c)
++                    {
++                      subject[0] = c;
++                      if (!(case_fold && isupper (c))
++                          && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
++                        setbit_case_fold (c, ccl);
++                    }
++                  regfree (&re);
++                }
+             }
+ 
+           colon_warning_state |= 8;


More information about the scm-commits mailing list