[postgresql/f20] upgrade: be more benevolent in locale comparison
Pavel Raiskup
praiskup at fedoraproject.org
Mon Oct 20 09:50:32 UTC 2014
commit 3a6ac9a75d1b5cb6093200f81dc39aca80e07d99
Author: Pavel Raiskup <praiskup at redhat.com>
Date: Mon Oct 20 11:47:13 2014 +0200
upgrade: be more benevolent in locale comparison
In F20+, we should be OK for upgrades where locale changed like
from en_US.utf8 to en_US.UTF-8.
Resolves: #1007802
Version: 9.3.5-2
postgresql-upgrade-locale-spelling-2.patch | 40 +++++++++
postgresql-upgrade-locale-spelling.patch | 129 ++++++++++++++++++++++++++++
postgresql.spec | 9 ++-
3 files changed, 177 insertions(+), 1 deletions(-)
---
diff --git a/postgresql-upgrade-locale-spelling-2.patch b/postgresql-upgrade-locale-spelling-2.patch
new file mode 100644
index 0000000..0e863b6
--- /dev/null
+++ b/postgresql-upgrade-locale-spelling-2.patch
@@ -0,0 +1,40 @@
+From cd3e0071b8c9e082f5fe903a019d4e474be98e57 Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl at sss.pgh.pa.us>
+Date: Fri, 31 Jan 2014 02:03:30 -0500
+Subject: [PATCH] Allow unrecognized encoding names in locales, as long as
+ they're the same.
+
+The buildfarm says commit 58274728fb8e087049df67c0eee903d9743fdeda doesn't
+work so well on Windows. This is because the encoding part of Windows
+locale names can be just a code page number, eg "1252", which we don't
+consider to be a valid encoding name. Add a check to accept encoding
+parts that are case-insensitively string equal; this at least ensures
+that the new code doesn't reject any cases that the old code allowed.
+---
+ contrib/pg_upgrade/check.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
+index 58c2d16..faeaff5 100644
+--- a/contrib/pg_upgrade/check.c
++++ b/contrib/pg_upgrade/check.c
+@@ -447,8 +447,14 @@ equivalent_locale(const char *loca, const char *locb)
+ if (!chara || !charb)
+ return (pg_strcasecmp(loca, locb) == 0);
+
+- /* Compare the encoding parts. */
+- if (!equivalent_encoding(chara + 1, charb + 1))
++ /*
++ * Compare the encoding parts. Windows tends to use code page numbers for
++ * the encoding part, which equivalent_encoding() won't like, so accept if
++ * the strings are case-insensitive equal; otherwise use
++ * equivalent_encoding() to compare.
++ */
++ if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
++ !equivalent_encoding(chara + 1, charb + 1))
+ return false;
+
+ /*
+--
+1.9.3
+
diff --git a/postgresql-upgrade-locale-spelling.patch b/postgresql-upgrade-locale-spelling.patch
new file mode 100644
index 0000000..239240b
--- /dev/null
+++ b/postgresql-upgrade-locale-spelling.patch
@@ -0,0 +1,129 @@
+From 58274728fb8e087049df67c0eee903d9743fdeda Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl at sss.pgh.pa.us>
+Date: Thu, 30 Jan 2014 19:07:06 -0500
+Subject: [PATCH] Be forgiving of variant spellings of locale names in
+ pg_upgrade.
+
+Even though the server tries to canonicalize stored locale names, the
+platform often doesn't cooperate, so it's entirely possible that one DB
+thinks its locale is, say, "en_US.UTF-8" while the other has "en_US.utf8".
+Rather than failing, we should try to allow this where it's clearly OK.
+
+There is already pretty robust encoding lookup in encnames.c, so make
+use of that to compare the encoding parts of the names. The locale
+identifier parts are just compared case-insensitively, which we were
+already doing. The major problem known to exist in the field is variant
+encoding-name spellings, so hopefully this will be Good Enough. If not,
+we can try being even laxer.
+
+Pavel Raiskup, reviewed by Rushabh Lathia
+---
+ contrib/pg_upgrade/check.c | 66 +++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 63 insertions(+), 3 deletions(-)
+
+diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
+index 794d22c..58c2d16 100644
+--- a/contrib/pg_upgrade/check.c
++++ b/contrib/pg_upgrade/check.c
+@@ -9,6 +9,7 @@
+
+ #include "postgres_fe.h"
+
++#include "mb/pg_wchar.h"
+ #include "pg_upgrade.h"
+
+
+@@ -16,6 +17,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
+ static void check_new_cluster_is_empty(void);
+ static void check_locale_and_encoding(ControlData *oldctrl,
+ ControlData *newctrl);
++static bool equivalent_locale(const char *loca, const char *locb);
++static bool equivalent_encoding(const char *chara, const char *charb);
+ static void check_is_super_user(ClusterInfo *cluster);
+ static void check_for_prepared_transactions(ClusterInfo *cluster);
+ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
+@@ -412,21 +415,78 @@ check_locale_and_encoding(ControlData *oldctrl,
+ * They also often use inconsistent hyphenation, which we cannot fix, e.g.
+ * UTF-8 vs. UTF8, so at least we display the mismatching values.
+ */
+- if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
++ if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
+ pg_log(PG_FATAL,
+ "lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
+ oldctrl->lc_collate, newctrl->lc_collate);
+- if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
++ if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
+ pg_log(PG_FATAL,
+ "lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
+ oldctrl->lc_ctype, newctrl->lc_ctype);
+- if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
++ if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
+ pg_log(PG_FATAL,
+ "encoding cluster values do not match: old \"%s\", new \"%s\"\n",
+ oldctrl->encoding, newctrl->encoding);
+ }
+
+
++/*
++ * equivalent_locale()
++ *
++ * Best effort locale-name comparison. Return false if we are not 100% sure
++ * the locales are equivalent.
++ */
++static bool
++equivalent_locale(const char *loca, const char *locb)
++{
++ const char *chara = strrchr(loca, '.');
++ const char *charb = strrchr(locb, '.');
++ int lencmp;
++
++ /* If they don't both contain an encoding part, just do strcasecmp(). */
++ if (!chara || !charb)
++ return (pg_strcasecmp(loca, locb) == 0);
++
++ /* Compare the encoding parts. */
++ if (!equivalent_encoding(chara + 1, charb + 1))
++ return false;
++
++ /*
++ * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
++ *
++ * It's tempting to ignore non-alphanumeric chars here, but for now it's
++ * not clear that that's necessary; just do case-insensitive comparison.
++ */
++ lencmp = chara - loca;
++ if (lencmp != charb - locb)
++ return false;
++
++ return (pg_strncasecmp(loca, locb, lencmp) == 0);
++}
++
++/*
++ * equivalent_encoding()
++ *
++ * Best effort encoding-name comparison. Return true only if the encodings
++ * are valid server-side encodings and known equivalent.
++ *
++ * Because the lookup in pg_valid_server_encoding() does case folding and
++ * ignores non-alphanumeric characters, this will recognize many popular
++ * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
++ */
++static bool
++equivalent_encoding(const char *chara, const char *charb)
++{
++ int enca = pg_valid_server_encoding(chara);
++ int encb = pg_valid_server_encoding(charb);
++
++ if (enca < 0 || encb < 0)
++ return false;
++
++ return (enca == encb);
++}
++
++
+ static void
+ check_new_cluster_is_empty(void)
+ {
+--
+1.9.3
+
diff --git a/postgresql.spec b/postgresql.spec
index 427e157..c738f02 100644
--- a/postgresql.spec
+++ b/postgresql.spec
@@ -64,7 +64,7 @@ Summary: PostgreSQL client programs
Name: postgresql
%global majorversion 9.3
Version: 9.3.5
-Release: 1%{?dist}
+Release: 2%{?dist}
# The PostgreSQL license is very similar to other MIT licenses, but the OSI
# recognizes it as an independent license, so we do as well.
@@ -111,6 +111,8 @@ Patch3: postgresql-perl-rpath.patch
Patch4: postgresql-config-comment.patch
Patch5: postgresql-var-run-socket.patch
Patch6: postgresql-man.patch
+Patch7: postgresql-upgrade-locale-spelling.patch
+Patch8: postgresql-upgrade-locale-spelling-2.patch
BuildRequires: perl(ExtUtils::MakeMaker) glibc-devel bison flex gawk help2man
BuildRequires: perl(ExtUtils::Embed), perl-devel
@@ -339,6 +341,8 @@ benchmarks.
%patch4 -p1
%patch5 -p1
%patch6 -p1
+%patch7 -p1
+%patch8 -p1
# We used to run autoconf here, but there's no longer any real need to,
# since Postgres ships with a reasonably modern configure script.
@@ -1135,6 +1139,9 @@ fi
%endif
%changelog
+* Mon Oct 20 2014 Pavel Raiskup <praiskup at redhat.com> - 9.3.5-2
+- be forgiving of variant spellings of locale names in pg_upgrade (#1007802)
+
* Tue Jul 22 2014 Pavel Raiskup <praiskup at redhat.com> - 9.3.5-1
- update to 9.3.5 per release notes
http://www.postgresql.org/docs/9.3/static/release-9-3-5.html
More information about the scm-commits
mailing list