[coreutils] improve i18n support in sort

Kamil Dudka kdudka at fedoraproject.org
Tue Oct 26 17:01:40 UTC 2010


commit bd229edf8d10a3e348fc7661c3638b464bbd7b64
Author: Kamil Dudka <kdudka at redhat.com>
Date:   Tue Oct 26 18:53:51 2010 +0200

    improve i18n support in sort
    
    test misc/sort-debug-keys is now back

 coreutils-i18n.patch |  134 +++++++++++++++++++++++++------------------------
 coreutils.spec       |    5 ++-
 2 files changed, 72 insertions(+), 67 deletions(-)
---
diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch
index 89dde2c..0860224 100644
--- a/coreutils-i18n.patch
+++ b/coreutils-i18n.patch
@@ -1,3 +1,15 @@
+ lib/linebuffer.h  |    8 +
+ src/cut.c         |  420 ++++++++++++++++++++++++++++++--
+ src/expand.c      |  160 ++++++++++++-
+ src/fold.c        |  309 +++++++++++++++++++++--
+ src/join.c        |  347 +++++++++++++++++++++++----
+ src/pr.c          |  431 +++++++++++++++++++++++++++++---
+ src/sort.c        |  704 ++++++++++++++++++++++++++++++++++++++++++++++++++---
+ src/unexpand.c    |  226 +++++++++++++++++-
+ src/uniq.c        |  259 +++++++++++++++++++-
+ tests/Makefile.am |    5 +
+ 10 files changed, 2689 insertions(+), 180 deletions(-)
+
 diff -urNp coreutils-8.6-orig/lib/linebuffer.h coreutils-8.6/lib/linebuffer.h
 --- coreutils-8.6-orig/lib/linebuffer.h	2010-06-10 18:45:26.000000000 +0200
 +++ coreutils-8.6/lib/linebuffer.h	2010-10-18 15:18:11.932209034 +0200
@@ -2417,9 +2429,10 @@ diff -urNp coreutils-8.6-orig/src/pr.c coreutils-8.6/src/pr.c
  /* We've just printed some files and need to clean up things before
     looking for more options and printing the next batch of files.
  
-diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
---- coreutils-8.6-orig/src/sort.c	2010-10-14 11:39:14.000000000 +0200
-+++ coreutils-8.6/src/sort.c	2010-10-18 15:16:14.976458929 +0200
+diff --git a/src/sort.c b/src/sort.c
+index 7e25f6a..d3f8915 100644
+--- a/src/sort.c
++++ b/src/sort.c
 @@ -22,11 +22,20 @@
  
  #include <config.h>
@@ -2498,7 +2511,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  
  /* Flag to remove consecutive duplicate lines from the output.
     Only the last of a sequence of equal lines will be output. */
-@@ -782,6 +813,44 @@ reap_some (void)
+@@ -782,6 +813,46 @@ reap_some (void)
      update_proc (pid);
  }
  
@@ -2509,6 +2522,8 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
 +(*begfield) (const struct line*, const struct keyfield *);
 +static char *
 +(*limfield) (const struct line*, const struct keyfield *);
++static void
++(*skipblanks) (const char **ptr, const char *lim);
 +static int
 +(*getmonth) (char const *, size_t, char **);
 +static int
@@ -2543,7 +2558,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  /* Clean up any remaining temporary files.  */
  
  static void
-@@ -1205,7 +1274,7 @@ zaptemp (char const *name)
+@@ -1205,7 +1276,7 @@ zaptemp (char const *name)
    free (node);
  }
  
@@ -2552,7 +2567,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  
  static int
  struct_month_cmp (void const *m1, void const *m2)
-@@ -1220,7 +1289,7 @@ struct_month_cmp (void const *m1, void c
+@@ -1220,7 +1291,7 @@ struct_month_cmp (void const *m1, void const *m2)
  /* Initialize the character class tables. */
  
  static void
@@ -2561,7 +2576,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  {
    size_t i;
  
-@@ -1232,7 +1301,7 @@ inittables (void)
+@@ -1232,7 +1303,7 @@ inittables (void)
        fold_toupper[i] = toupper (i);
      }
  
@@ -2570,7 +2585,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
    /* If we're not in the "C" locale, read different names for months.  */
    if (hard_LC_TIME)
      {
-@@ -1314,6 +1383,64 @@ specify_nmerge (int oi, char c, char con
+@@ -1314,6 +1385,64 @@ specify_nmerge (int oi, char c, char const *s)
      xstrtol_fatal (e, oi, c, long_options, s);
  }
  
@@ -2635,7 +2650,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  /* Specify the amount of main memory to use when sorting.  */
  static void
  specify_sort_size (int oi, char c, char const *s)
-@@ -1540,7 +1667,7 @@ buffer_linelim (struct buffer const *buf
+@@ -1540,7 +1669,7 @@ buffer_linelim (struct buffer const *buf)
     by KEY in LINE. */
  
  static char *
@@ -2644,7 +2659,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  {
    char *ptr = line->text, *lim = ptr + line->length - 1;
    size_t sword = key->sword;
-@@ -1549,10 +1676,10 @@ begfield (struct line const *line, struc
+@@ -1549,10 +1678,10 @@ begfield (struct line const *line, struct keyfield const *key)
    /* The leading field separator itself is included in a field when -t
       is absent.  */
  
@@ -2657,7 +2672,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
            ++ptr;
          if (ptr < lim)
            ++ptr;
-@@ -1578,11 +1705,70 @@ begfield (struct line const *line, struc
+@@ -1578,11 +1707,70 @@ begfield (struct line const *line, struct keyfield const *key)
    return ptr;
  }
  
@@ -2729,7 +2744,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  {
    char *ptr = line->text, *lim = ptr + line->length - 1;
    size_t eword = key->eword, echar = key->echar;
-@@ -1597,10 +1783,10 @@ limfield (struct line const *line, struc
+@@ -1597,10 +1785,10 @@ limfield (struct line const *line, struct keyfield const *key)
       `beginning' is the first character following the delimiting TAB.
       Otherwise, leave PTR pointing at the first `blank' character after
       the preceding field.  */
@@ -2742,7 +2757,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
            ++ptr;
          if (ptr < lim && (eword || echar))
            ++ptr;
-@@ -1646,10 +1832,10 @@ limfield (struct line const *line, struc
+@@ -1646,10 +1834,10 @@ limfield (struct line const *line, struct keyfield const *key)
       */
  
    /* Make LIM point to the end of (one byte past) the current field.  */
@@ -2755,7 +2770,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
        if (newlim)
          lim = newlim;
      }
-@@ -1680,6 +1866,113 @@ limfield (struct line const *line, struc
+@@ -1680,6 +1868,130 @@ limfield (struct line const *line, struct keyfield const *key)
    return ptr;
  }
  
@@ -2866,10 +2881,27 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
 +}
 +#endif
 +
++static void
++skipblanks_uni (const char **ptr, const char *lim)
++{
++  while (*ptr < lim && blanks[to_uchar (**ptr)])
++    ++(*ptr);
++}
++
++#if HAVE_MBRTOWC
++static void
++skipblanks_mb (const char **ptr, const char *lim)
++{
++  size_t mblength;
++  while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
++    (*ptr) += mblength;
++}
++#endif
++
  /* Fill BUF reading from FP, moving buf->left bytes from the end
     of buf->buf to the beginning first.  If EOF is reached and the
     file wasn't terminated by a newline, supply one.  Set up BUF's line
-@@ -1766,8 +2059,24 @@ fillbuf (struct buffer *buf, FILE *fp, c
+@@ -1766,8 +2078,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
                    else
                      {
                        if (key->skipsblanks)
@@ -2880,8 +2912,6 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
 +                          if (MB_CUR_MAX > 1)
 +                            {
 +                              size_t mblength;
-+                              mbstate_t state;
-+                              memset (&state, '\0', sizeof(mbstate_t));
 +                              while (line_start < line->keylim &&
 +                                     ismbblank (line_start,
 +                                                line->keylim - line_start,
@@ -2896,7 +2926,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
                        line->keybeg = line_start;
                      }
                  }
-@@ -1888,7 +2197,7 @@ human_numcompare (char const *a, char co
+@@ -1888,7 +2214,7 @@ human_numcompare (char const *a, char const *b)
     hideously fast. */
  
  static int
@@ -2905,7 +2935,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  {
    while (blanks[to_uchar (*a)])
      a++;
-@@ -1898,6 +2207,25 @@ numcompare (char const *a, char const *b
+@@ -1898,6 +2224,25 @@ numcompare (char const *a, char const *b)
    return strnumcmp (a, b, decimal_point, thousands_sep);
  }
  
@@ -2931,7 +2961,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  static int
  general_numcompare (char const *sa, char const *sb)
  {
-@@ -1930,7 +2258,7 @@ general_numcompare (char const *sa, char
+@@ -1930,7 +2275,7 @@ general_numcompare (char const *sa, char const *sb)
     Return 0 if the name in S is not recognized.  */
  
  static int
@@ -2940,7 +2970,14 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  {
    size_t lo = 0;
    size_t hi = MONTHS_PER_YEAR;
-@@ -2210,7 +2538,7 @@ debug_key (struct line const *line, stru
+@@ -2204,13 +2549,12 @@ debug_key (struct line const *line, struct keyfield const *key)
+         {
+           char saved = *lim; *lim = '\0';
+ 
+-          while (blanks[to_uchar (*beg)])
+-            beg++;
++          skipblanks (&beg, lim);
+ 
            char *tighter_lim = beg;
  
            if (key->month)
@@ -2949,7 +2986,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
            else if (key->general_numeric)
              ignore_value (strtold (beg, &tighter_lim));
            else if (key->numeric || key->human_numeric)
-@@ -2354,7 +2682,7 @@ key_warnings (struct keyfield const *gke
+@@ -2354,7 +2698,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
        bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
                                   && !(key->schar || key->echar);
        bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y  */
@@ -2958,7 +2995,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
            && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
                || (!key->skipsblanks && key->schar)
                || (!key->skipeblanks && key->echar)))
-@@ -2412,11 +2740,83 @@ key_warnings (struct keyfield const *gke
+@@ -2412,11 +2756,83 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
      error (0, 0, _("option `-r' only applies to last-resort comparison"));
  }
  
@@ -3043,7 +3080,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  {
    struct keyfield *key = keylist;
  
-@@ -2501,7 +2898,7 @@ keycompare (struct line const *a, struct
+@@ -2501,7 +2917,7 @@ keycompare (struct line const *a, struct line const *b)
            else if (key->human_numeric)
              diff = human_numcompare (ta, tb);
            else if (key->month)
@@ -3052,7 +3089,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
            else if (key->random)
              diff = compare_random (ta, tlena, tb, tlenb);
            else if (key->version)
-@@ -2617,6 +3014,179 @@ keycompare (struct line const *a, struct
+@@ -2617,6 +3033,179 @@ keycompare (struct line const *a, struct line const *b)
    return key->reverse ? -diff : diff;
  }
  
@@ -3232,7 +3269,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
  /* Compare two lines A and B, returning negative, zero, or positive
     depending on whether A compares less than, equal to, or greater than B. */
  
-@@ -4006,7 +4576,7 @@ main (int argc, char **argv)
+@@ -4006,7 +4595,7 @@ main (int argc, char **argv)
    initialize_exit_failure (SORT_FAILURE);
  
    hard_LC_COLLATE = hard_locale (LC_COLLATE);
@@ -3241,7 +3278,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
    hard_LC_TIME = hard_locale (LC_TIME);
  #endif
  
-@@ -4027,6 +4597,27 @@ main (int argc, char **argv)
+@@ -4027,6 +4616,29 @@ main (int argc, char **argv)
        thousands_sep = -1;
    }
  
@@ -3251,6 +3288,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
 +      inittables = inittables_mb;
 +      begfield = begfield_mb;
 +      limfield = limfield_mb;
++      skipblanks = skipblanks_mb;
 +      getmonth = getmonth_mb;
 +      keycompare = keycompare_mb;
 +      numcompare = numcompare_mb;
@@ -3261,6 +3299,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
 +      inittables = inittables_uni;
 +      begfield = begfield_uni;
 +      limfield = limfield_uni;
++      skipblanks = skipblanks_uni;
 +      getmonth = getmonth_uni;
 +      keycompare = keycompare_uni;
 +      numcompare = numcompare_uni;
@@ -3269,7 +3308,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
    have_read_stdin = false;
    inittables ();
  
-@@ -4297,13 +4888,35 @@ main (int argc, char **argv)
+@@ -4297,13 +4909,34 @@ main (int argc, char **argv)
  
          case 't':
            {
@@ -3286,7 +3325,6 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
 +              {
 +                wchar_t wc;
 +                mbstate_t state;
-+                size_t i;
 +
 +                memset (&state, '\0', sizeof (mbstate_t));
 +                newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
@@ -3309,7 +3347,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
                  else
                    {
                      /* Provoke with `sort -txx'.  Complain about
-@@ -4314,9 +4927,12 @@ main (int argc, char **argv)
+@@ -4314,9 +4947,12 @@ main (int argc, char **argv)
                             quote (optarg));
                    }
                }
@@ -4086,39 +4124,3 @@ diff -urNp coreutils-8.6-orig/tests/misc/sort-mb-tests coreutils-8.6/tests/misc/
 +fi
 +test $errors = 0 || errors=1
 +exit $errors
-diff -urNp coreutils-8.6-orig/tests/misc/sort-debug-keys coreutils-8.6/tests/misc/sort-debug-keys
---- coreutils-8.6-orig/tests/misc/sort-debug-keys	2010-10-11 19:35:11.000000000 +0200
-+++ coreutils-8.6/tests/misc/sort-debug-keys	2010-10-19 14:55:55.435692063 +0200
-@@ -305,18 +305,19 @@ _____
- ___________________
- EOF
- 
--: ${LOCALE_FR_UTF8=none}
--if test "$LOCALE_FR_UTF8" != "none"; then
--  (
--  echo '   1²---++3   1,234  Mi' |
--    LC_ALL=C sort --debug -k2g -k1b,1
--  echo '   1²---++3   1,234  Mi' |
--    LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
--  echo '+1234 1234Gi 1,234M' |
--    LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
--    -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
--  ) > out
--  compare out exp || fail=1
--fi
-+#temporarily disable sort debug-keys test for mbyte locales (doesn't work atm.)
-+#: ${LOCALE_FR_UTF8=none}
-+#if test "$LOCALE_FR_UTF8" != "none"; then
-+#  (
-+#  echo '   1²---++3   1,234  Mi' |
-+#    LC_ALL=C sort --debug -k2g -k1b,1
-+#  echo '   1²---++3   1,234  Mi' |
-+#    LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
-+#  echo '+1234 1234Gi 1,234M' |
-+#    LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
-+#    -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
-+#  ) > out
-+#  compare out exp || fail=1
-+#fi
- 
- Exit $fail
diff --git a/coreutils.spec b/coreutils.spec
index 08532d3..baae402 100644
--- a/coreutils.spec
+++ b/coreutils.spec
@@ -1,7 +1,7 @@
 Summary: A set of basic GNU tools commonly used in shell scripts
 Name:    coreutils
 Version: 8.6
-Release: 1%{?dist}
+Release: 2%{?dist}
 License: GPLv3+
 Group:   System Environment/Base
 Url:     http://www.gnu.org/software/coreutils/
@@ -336,6 +336,9 @@ fi
 %{_libdir}/coreutils
 
 %changelog
+* Tue Oct 26 2010 Kamil Dudka <kdudka at redhat.com> - 8.6-2
+- improve i18n support in sort (debug-keys test is now back)
+
 * Wed Oct 20 2010 Ondrej Vasik <ovasik at redhat.com> - 8.6-1
 - new upstream release 8.6
 - remove applied patches, temporarily disable sort


More information about the scm-commits mailing list