[coreutils] improve i18n support in sort
Kamil Dudka
kdudka at fedoraproject.org
Tue Oct 26 17:01:40 UTC 2010
commit bd229edf8d10a3e348fc7661c3638b464bbd7b64
Author: Kamil Dudka <kdudka at redhat.com>
Date: Tue Oct 26 18:53:51 2010 +0200
improve i18n support in sort
test misc/sort-debug-keys is now back
coreutils-i18n.patch | 134 +++++++++++++++++++++++++------------------------
coreutils.spec | 5 ++-
2 files changed, 72 insertions(+), 67 deletions(-)
---
diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch
index 89dde2c..0860224 100644
--- a/coreutils-i18n.patch
+++ b/coreutils-i18n.patch
@@ -1,3 +1,15 @@
+ lib/linebuffer.h | 8 +
+ src/cut.c | 420 ++++++++++++++++++++++++++++++--
+ src/expand.c | 160 ++++++++++++-
+ src/fold.c | 309 +++++++++++++++++++++--
+ src/join.c | 347 +++++++++++++++++++++++----
+ src/pr.c | 431 +++++++++++++++++++++++++++++---
+ src/sort.c | 704 ++++++++++++++++++++++++++++++++++++++++++++++++++---
+ src/unexpand.c | 226 +++++++++++++++++-
+ src/uniq.c | 259 +++++++++++++++++++-
+ tests/Makefile.am | 5 +
+ 10 files changed, 2689 insertions(+), 180 deletions(-)
+
diff -urNp coreutils-8.6-orig/lib/linebuffer.h coreutils-8.6/lib/linebuffer.h
--- coreutils-8.6-orig/lib/linebuffer.h 2010-06-10 18:45:26.000000000 +0200
+++ coreutils-8.6/lib/linebuffer.h 2010-10-18 15:18:11.932209034 +0200
@@ -2417,9 +2429,10 @@ diff -urNp coreutils-8.6-orig/src/pr.c coreutils-8.6/src/pr.c
/* We've just printed some files and need to clean up things before
looking for more options and printing the next batch of files.
-diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
---- coreutils-8.6-orig/src/sort.c 2010-10-14 11:39:14.000000000 +0200
-+++ coreutils-8.6/src/sort.c 2010-10-18 15:16:14.976458929 +0200
+diff --git a/src/sort.c b/src/sort.c
+index 7e25f6a..d3f8915 100644
+--- a/src/sort.c
++++ b/src/sort.c
@@ -22,11 +22,20 @@
#include <config.h>
@@ -2498,7 +2511,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Flag to remove consecutive duplicate lines from the output.
Only the last of a sequence of equal lines will be output. */
-@@ -782,6 +813,44 @@ reap_some (void)
+@@ -782,6 +813,46 @@ reap_some (void)
update_proc (pid);
}
@@ -2509,6 +2522,8 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+(*begfield) (const struct line*, const struct keyfield *);
+static char *
+(*limfield) (const struct line*, const struct keyfield *);
++static void
++(*skipblanks) (const char **ptr, const char *lim);
+static int
+(*getmonth) (char const *, size_t, char **);
+static int
@@ -2543,7 +2558,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Clean up any remaining temporary files. */
static void
-@@ -1205,7 +1274,7 @@ zaptemp (char const *name)
+@@ -1205,7 +1276,7 @@ zaptemp (char const *name)
free (node);
}
@@ -2552,7 +2567,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
static int
struct_month_cmp (void const *m1, void const *m2)
-@@ -1220,7 +1289,7 @@ struct_month_cmp (void const *m1, void c
+@@ -1220,7 +1291,7 @@ struct_month_cmp (void const *m1, void const *m2)
/* Initialize the character class tables. */
static void
@@ -2561,7 +2576,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{
size_t i;
-@@ -1232,7 +1301,7 @@ inittables (void)
+@@ -1232,7 +1303,7 @@ inittables (void)
fold_toupper[i] = toupper (i);
}
@@ -2570,7 +2585,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* If we're not in the "C" locale, read different names for months. */
if (hard_LC_TIME)
{
-@@ -1314,6 +1383,64 @@ specify_nmerge (int oi, char c, char con
+@@ -1314,6 +1385,64 @@ specify_nmerge (int oi, char c, char const *s)
xstrtol_fatal (e, oi, c, long_options, s);
}
@@ -2635,7 +2650,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Specify the amount of main memory to use when sorting. */
static void
specify_sort_size (int oi, char c, char const *s)
-@@ -1540,7 +1667,7 @@ buffer_linelim (struct buffer const *buf
+@@ -1540,7 +1669,7 @@ buffer_linelim (struct buffer const *buf)
by KEY in LINE. */
static char *
@@ -2644,7 +2659,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{
char *ptr = line->text, *lim = ptr + line->length - 1;
size_t sword = key->sword;
-@@ -1549,10 +1676,10 @@ begfield (struct line const *line, struc
+@@ -1549,10 +1678,10 @@ begfield (struct line const *line, struct keyfield const *key)
/* The leading field separator itself is included in a field when -t
is absent. */
@@ -2657,7 +2672,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
++ptr;
if (ptr < lim)
++ptr;
-@@ -1578,11 +1705,70 @@ begfield (struct line const *line, struc
+@@ -1578,11 +1707,70 @@ begfield (struct line const *line, struct keyfield const *key)
return ptr;
}
@@ -2729,7 +2744,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{
char *ptr = line->text, *lim = ptr + line->length - 1;
size_t eword = key->eword, echar = key->echar;
-@@ -1597,10 +1783,10 @@ limfield (struct line const *line, struc
+@@ -1597,10 +1785,10 @@ limfield (struct line const *line, struct keyfield const *key)
`beginning' is the first character following the delimiting TAB.
Otherwise, leave PTR pointing at the first `blank' character after
the preceding field. */
@@ -2742,7 +2757,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
++ptr;
if (ptr < lim && (eword || echar))
++ptr;
-@@ -1646,10 +1832,10 @@ limfield (struct line const *line, struc
+@@ -1646,10 +1834,10 @@ limfield (struct line const *line, struct keyfield const *key)
*/
/* Make LIM point to the end of (one byte past) the current field. */
@@ -2755,7 +2770,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
if (newlim)
lim = newlim;
}
-@@ -1680,6 +1866,113 @@ limfield (struct line const *line, struc
+@@ -1680,6 +1868,130 @@ limfield (struct line const *line, struct keyfield const *key)
return ptr;
}
@@ -2866,10 +2881,27 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+}
+#endif
+
++static void
++skipblanks_uni (const char **ptr, const char *lim)
++{
++ while (*ptr < lim && blanks[to_uchar (**ptr)])
++ ++(*ptr);
++}
++
++#if HAVE_MBRTOWC
++static void
++skipblanks_mb (const char **ptr, const char *lim)
++{
++ size_t mblength;
++ while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
++ (*ptr) += mblength;
++}
++#endif
++
/* Fill BUF reading from FP, moving buf->left bytes from the end
of buf->buf to the beginning first. If EOF is reached and the
file wasn't terminated by a newline, supply one. Set up BUF's line
-@@ -1766,8 +2059,24 @@ fillbuf (struct buffer *buf, FILE *fp, c
+@@ -1766,8 +2078,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
else
{
if (key->skipsblanks)
@@ -2880,8 +2912,6 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ if (MB_CUR_MAX > 1)
+ {
+ size_t mblength;
-+ mbstate_t state;
-+ memset (&state, '\0', sizeof(mbstate_t));
+ while (line_start < line->keylim &&
+ ismbblank (line_start,
+ line->keylim - line_start,
@@ -2896,7 +2926,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
line->keybeg = line_start;
}
}
-@@ -1888,7 +2197,7 @@ human_numcompare (char const *a, char co
+@@ -1888,7 +2214,7 @@ human_numcompare (char const *a, char const *b)
hideously fast. */
static int
@@ -2905,7 +2935,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{
while (blanks[to_uchar (*a)])
a++;
-@@ -1898,6 +2207,25 @@ numcompare (char const *a, char const *b
+@@ -1898,6 +2224,25 @@ numcompare (char const *a, char const *b)
return strnumcmp (a, b, decimal_point, thousands_sep);
}
@@ -2931,7 +2961,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
static int
general_numcompare (char const *sa, char const *sb)
{
-@@ -1930,7 +2258,7 @@ general_numcompare (char const *sa, char
+@@ -1930,7 +2275,7 @@ general_numcompare (char const *sa, char const *sb)
Return 0 if the name in S is not recognized. */
static int
@@ -2940,7 +2970,14 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{
size_t lo = 0;
size_t hi = MONTHS_PER_YEAR;
-@@ -2210,7 +2538,7 @@ debug_key (struct line const *line, stru
+@@ -2204,13 +2549,12 @@ debug_key (struct line const *line, struct keyfield const *key)
+ {
+ char saved = *lim; *lim = '\0';
+
+- while (blanks[to_uchar (*beg)])
+- beg++;
++ skipblanks (&beg, lim);
+
char *tighter_lim = beg;
if (key->month)
@@ -2949,7 +2986,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
else if (key->general_numeric)
ignore_value (strtold (beg, &tighter_lim));
else if (key->numeric || key->human_numeric)
-@@ -2354,7 +2682,7 @@ key_warnings (struct keyfield const *gke
+@@ -2354,7 +2698,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
&& !(key->schar || key->echar);
bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
@@ -2958,7 +2995,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
&& ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
|| (!key->skipsblanks && key->schar)
|| (!key->skipeblanks && key->echar)))
-@@ -2412,11 +2740,83 @@ key_warnings (struct keyfield const *gke
+@@ -2412,11 +2756,83 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
error (0, 0, _("option `-r' only applies to last-resort comparison"));
}
@@ -3043,7 +3080,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
{
struct keyfield *key = keylist;
-@@ -2501,7 +2898,7 @@ keycompare (struct line const *a, struct
+@@ -2501,7 +2917,7 @@ keycompare (struct line const *a, struct line const *b)
else if (key->human_numeric)
diff = human_numcompare (ta, tb);
else if (key->month)
@@ -3052,7 +3089,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
else if (key->random)
diff = compare_random (ta, tlena, tb, tlenb);
else if (key->version)
-@@ -2617,6 +3014,179 @@ keycompare (struct line const *a, struct
+@@ -2617,6 +3033,179 @@ keycompare (struct line const *a, struct line const *b)
return key->reverse ? -diff : diff;
}
@@ -3232,7 +3269,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
/* Compare two lines A and B, returning negative, zero, or positive
depending on whether A compares less than, equal to, or greater than B. */
-@@ -4006,7 +4576,7 @@ main (int argc, char **argv)
+@@ -4006,7 +4595,7 @@ main (int argc, char **argv)
initialize_exit_failure (SORT_FAILURE);
hard_LC_COLLATE = hard_locale (LC_COLLATE);
@@ -3241,7 +3278,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
hard_LC_TIME = hard_locale (LC_TIME);
#endif
-@@ -4027,6 +4597,27 @@ main (int argc, char **argv)
+@@ -4027,6 +4616,29 @@ main (int argc, char **argv)
thousands_sep = -1;
}
@@ -3251,6 +3288,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ inittables = inittables_mb;
+ begfield = begfield_mb;
+ limfield = limfield_mb;
++ skipblanks = skipblanks_mb;
+ getmonth = getmonth_mb;
+ keycompare = keycompare_mb;
+ numcompare = numcompare_mb;
@@ -3261,6 +3299,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ inittables = inittables_uni;
+ begfield = begfield_uni;
+ limfield = limfield_uni;
++ skipblanks = skipblanks_uni;
+ getmonth = getmonth_uni;
+ keycompare = keycompare_uni;
+ numcompare = numcompare_uni;
@@ -3269,7 +3308,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
have_read_stdin = false;
inittables ();
-@@ -4297,13 +4888,35 @@ main (int argc, char **argv)
+@@ -4297,13 +4909,34 @@ main (int argc, char **argv)
case 't':
{
@@ -3286,7 +3325,6 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
+ {
+ wchar_t wc;
+ mbstate_t state;
-+ size_t i;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
@@ -3309,7 +3347,7 @@ diff -urNp coreutils-8.6-orig/src/sort.c coreutils-8.6/src/sort.c
else
{
/* Provoke with `sort -txx'. Complain about
-@@ -4314,9 +4927,12 @@ main (int argc, char **argv)
+@@ -4314,9 +4947,12 @@ main (int argc, char **argv)
quote (optarg));
}
}
@@ -4086,39 +4124,3 @@ diff -urNp coreutils-8.6-orig/tests/misc/sort-mb-tests coreutils-8.6/tests/misc/
+fi
+test $errors = 0 || errors=1
+exit $errors
-diff -urNp coreutils-8.6-orig/tests/misc/sort-debug-keys coreutils-8.6/tests/misc/sort-debug-keys
---- coreutils-8.6-orig/tests/misc/sort-debug-keys 2010-10-11 19:35:11.000000000 +0200
-+++ coreutils-8.6/tests/misc/sort-debug-keys 2010-10-19 14:55:55.435692063 +0200
-@@ -305,18 +305,19 @@ _____
- ___________________
- EOF
-
--: ${LOCALE_FR_UTF8=none}
--if test "$LOCALE_FR_UTF8" != "none"; then
-- (
-- echo ' 1²---++3 1,234 Mi' |
-- LC_ALL=C sort --debug -k2g -k1b,1
-- echo ' 1²---++3 1,234 Mi' |
-- LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
-- echo '+1234 1234Gi 1,234M' |
-- LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
-- -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
-- ) > out
-- compare out exp || fail=1
--fi
-+#temporarily disable sort debug-keys test for mbyte locales (doesn't work atm.)
-+#: ${LOCALE_FR_UTF8=none}
-+#if test "$LOCALE_FR_UTF8" != "none"; then
-+# (
-+# echo ' 1²---++3 1,234 Mi' |
-+# LC_ALL=C sort --debug -k2g -k1b,1
-+# echo ' 1²---++3 1,234 Mi' |
-+# LC_ALL=$LOCALE_FR_UTF8 sort --debug -k2g -k1b,1
-+# echo '+1234 1234Gi 1,234M' |
-+# LC_ALL=$LOCALE_FR_UTF8 sort --debug -k1,1n -k1,1g \
-+# -k1,1h -k2,2n -k2,2g -k2,2h -k3,3n -k3,3g -k3,3h
-+# ) > out
-+# compare out exp || fail=1
-+#fi
-
- Exit $fail
diff --git a/coreutils.spec b/coreutils.spec
index 08532d3..baae402 100644
--- a/coreutils.spec
+++ b/coreutils.spec
@@ -1,7 +1,7 @@
Summary: A set of basic GNU tools commonly used in shell scripts
Name: coreutils
Version: 8.6
-Release: 1%{?dist}
+Release: 2%{?dist}
License: GPLv3+
Group: System Environment/Base
Url: http://www.gnu.org/software/coreutils/
@@ -336,6 +336,9 @@ fi
%{_libdir}/coreutils
%changelog
+* Tue Oct 26 2010 Kamil Dudka <kdudka at redhat.com> - 8.6-2
+- improve i18n support in sort (debug-keys test is now back)
+
* Wed Oct 20 2010 Ondrej Vasik <ovasik at redhat.com> - 8.6-1
- new upstream release 8.6
- remove applied patches, temporarily disable sort
More information about the scm-commits
mailing list