[postgresql] upgrade: be more benevolent in locale comparison

Mon Oct 20 10:05:41 UTC 2014

commit 7fcd42641db5670f1d9622ec6fb6a121e2d984f2
Author: Pavel Raiskup <praiskup at redhat.com>
Date:   Mon Oct 20 11:47:13 2014 +0200

    upgrade: be more benevolent in locale comparison
    
    In F20+, we should be OK for upgrades where locale changed like
    from en_US.utf8 to en_US.UTF-8.
    
    Resolves: #1007802
    Version: 9.3.5-7

 postgresql-upgrade-locale-spelling-2.patch |   40 +++++++++
 postgresql-upgrade-locale-spelling.patch   |  129 ++++++++++++++++++++++++++++
 postgresql.spec                            |    9 ++-
 3 files changed, 177 insertions(+), 1 deletions(-)
---

diff --git a/postgresql-upgrade-locale-spelling-2.patch b/postgresql-upgrade-locale-spelling-2.patch
new file mode 100644
index 0000000..0e863b6
--- /dev/null
+++ b/postgresql-upgrade-locale-spelling-2.patch
@@ -0,0 +1,40 @@
+From cd3e0071b8c9e082f5fe903a019d4e474be98e57 Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl at sss.pgh.pa.us>
+Date: Fri, 31 Jan 2014 02:03:30 -0500
+Subject: [PATCH] Allow unrecognized encoding names in locales, as long as
+ they're the same.
+
+The buildfarm says commit 58274728fb8e087049df67c0eee903d9743fdeda doesn't
+work so well on Windows.  This is because the encoding part of Windows
+locale names can be just a code page number, eg "1252", which we don't
+consider to be a valid encoding name.  Add a check to accept encoding
+parts that are case-insensitively string equal; this at least ensures
+that the new code doesn't reject any cases that the old code allowed.
+---
+ contrib/pg_upgrade/check.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
+index 58c2d16..faeaff5 100644
+--- a/contrib/pg_upgrade/check.c
++++ b/contrib/pg_upgrade/check.c
+@@ -447,8 +447,14 @@ equivalent_locale(const char *loca, const char *locb)
+ 	if (!chara || !charb)
+ 		return (pg_strcasecmp(loca, locb) == 0);
+ 
+-	/* Compare the encoding parts. */
+-	if (!equivalent_encoding(chara + 1, charb + 1))
++	/*
++	 * Compare the encoding parts.	Windows tends to use code page numbers for
++	 * the encoding part, which equivalent_encoding() won't like, so accept if
++	 * the strings are case-insensitive equal; otherwise use
++	 * equivalent_encoding() to compare.
++	 */
++	if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
++		!equivalent_encoding(chara + 1, charb + 1))
+ 		return false;
+ 
+ 	/*
+-- 
+1.9.3
+
diff --git a/postgresql-upgrade-locale-spelling.patch b/postgresql-upgrade-locale-spelling.patch
new file mode 100644
index 0000000..239240b
--- /dev/null
+++ b/postgresql-upgrade-locale-spelling.patch
@@ -0,0 +1,129 @@
+From 58274728fb8e087049df67c0eee903d9743fdeda Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl at sss.pgh.pa.us>
+Date: Thu, 30 Jan 2014 19:07:06 -0500
+Subject: [PATCH] Be forgiving of variant spellings of locale names in
+ pg_upgrade.
+
+Even though the server tries to canonicalize stored locale names, the
+platform often doesn't cooperate, so it's entirely possible that one DB
+thinks its locale is, say, "en_US.UTF-8" while the other has "en_US.utf8".
+Rather than failing, we should try to allow this where it's clearly OK.
+
+There is already pretty robust encoding lookup in encnames.c, so make
+use of that to compare the encoding parts of the names.  The locale
+identifier parts are just compared case-insensitively, which we were
+already doing.  The major problem known to exist in the field is variant
+encoding-name spellings, so hopefully this will be Good Enough.  If not,
+we can try being even laxer.
+
+Pavel Raiskup, reviewed by Rushabh Lathia
+---
+ contrib/pg_upgrade/check.c | 66 +++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 63 insertions(+), 3 deletions(-)
+
+diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
+index 794d22c..58c2d16 100644
+--- a/contrib/pg_upgrade/check.c
++++ b/contrib/pg_upgrade/check.c
+@@ -9,6 +9,7 @@
+ 
+ #include "postgres_fe.h"
+ 
++#include "mb/pg_wchar.h"
+ #include "pg_upgrade.h"
+ 
+ 
+@@ -16,6 +17,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
+ static void check_new_cluster_is_empty(void);
+ static void check_locale_and_encoding(ControlData *oldctrl,
+ 						  ControlData *newctrl);
++static bool equivalent_locale(const char *loca, const char *locb);
++static bool equivalent_encoding(const char *chara, const char *charb);
+ static void check_is_super_user(ClusterInfo *cluster);
+ static void check_for_prepared_transactions(ClusterInfo *cluster);
+ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
+@@ -412,21 +415,78 @@ check_locale_and_encoding(ControlData *oldctrl,
+ 	 * They also often use inconsistent hyphenation, which we cannot fix, e.g.
+ 	 * UTF-8 vs. UTF8, so at least we display the mismatching values.
+ 	 */
+-	if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
++	if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
+ 		pg_log(PG_FATAL,
+ 		 "lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
+ 			   oldctrl->lc_collate, newctrl->lc_collate);
+-	if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
++	if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
+ 		pg_log(PG_FATAL,
+ 		   "lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
+ 			   oldctrl->lc_ctype, newctrl->lc_ctype);
+-	if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
++	if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
+ 		pg_log(PG_FATAL,
+ 		   "encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
+ 			   oldctrl->encoding, newctrl->encoding);
+ }
+ 
+ 
++/*
++ * equivalent_locale()
++ *
++ * Best effort locale-name comparison.  Return false if we are not 100% sure
++ * the locales are equivalent.
++ */
++static bool
++equivalent_locale(const char *loca, const char *locb)
++{
++	const char *chara = strrchr(loca, '.');
++	const char *charb = strrchr(locb, '.');
++	int			lencmp;
++
++	/* If they don't both contain an encoding part, just do strcasecmp(). */
++	if (!chara || !charb)
++		return (pg_strcasecmp(loca, locb) == 0);
++
++	/* Compare the encoding parts. */
++	if (!equivalent_encoding(chara + 1, charb + 1))
++		return false;
++
++	/*
++	 * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
++	 *
++	 * It's tempting to ignore non-alphanumeric chars here, but for now it's
++	 * not clear that that's necessary; just do case-insensitive comparison.
++	 */
++	lencmp = chara - loca;
++	if (lencmp != charb - locb)
++		return false;
++
++	return (pg_strncasecmp(loca, locb, lencmp) == 0);
++}
++
++/*
++ * equivalent_encoding()
++ *
++ * Best effort encoding-name comparison.  Return true only if the encodings
++ * are valid server-side encodings and known equivalent.
++ *
++ * Because the lookup in pg_valid_server_encoding() does case folding and
++ * ignores non-alphanumeric characters, this will recognize many popular
++ * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
++ */
++static bool
++equivalent_encoding(const char *chara, const char *charb)
++{
++	int			enca = pg_valid_server_encoding(chara);
++	int			encb = pg_valid_server_encoding(charb);
++
++	if (enca < 0 || encb < 0)
++		return false;
++
++	return (enca == encb);
++}
++
++
+ static void
+ check_new_cluster_is_empty(void)
+ {
+-- 
+1.9.3
+
diff --git a/postgresql.spec b/postgresql.spec
index e0634e8..aa33ac9 100644
--- a/postgresql.spec
+++ b/postgresql.spec
@@ -67,7 +67,7 @@ Summary: PostgreSQL client programs
 Name: postgresql
 %global majorversion 9.3
 Version: 9.3.5
-Release: 6%{?dist}
+Release: 7%{?dist}
 
 # The PostgreSQL license is very similar to other MIT licenses, but the OSI
 # recognizes it as an independent license, so we do as well.
@@ -119,6 +119,8 @@ Patch3: postgresql-perl-rpath.patch
 Patch4: postgresql-config-comment.patch
 Patch5: postgresql-var-run-socket.patch
 Patch6: postgresql-man.patch
+Patch7: postgresql-upgrade-locale-spelling.patch
+Patch8: postgresql-upgrade-locale-spelling-2.patch
 
 BuildRequires: perl(ExtUtils::MakeMaker) glibc-devel bison flex gawk help2man
 BuildRequires: perl(ExtUtils::Embed), perl-devel
@@ -355,6 +357,8 @@ benchmarks.
 %patch4 -p1
 %patch5 -p1
 %patch6 -p1
+%patch7 -p1
+%patch8 -p1
 
 # We used to run autoconf here, but there's no longer any real need to,
 # since Postgres ships with a reasonably modern configure script.
@@ -1151,6 +1155,9 @@ fi
 %endif
 
 %changelog
+* Mon Oct 20 2014 Pavel Raiskup <praiskup at redhat.com> - 9.3.5-7
+- be forgiving of variant spellings of locale names in pg_upgrade (#1007802)
+
 * Sun Sep 21 2014 Pavel Raiskup <praiskup at redhat.com> - 9.3.5-6
 - postgresql-setup & relatives are now in separate tarball