>From 40c6cb64f3dd974c9ee48baf0900f01f62f65291 Mon Sep 17 00:00:00 2001
From: Pavel Raiskup <praiskup@redhat.com>
Date: Sat, 21 Dec 2013 01:27:01 +0100
Subject: [PATCH] pg_upgrade: make the locale comparison more tolerating

Locale strings specified like 'cs_CZ.utf8' and 'cs_CZ.UTF-8'
should be treat as equivalent.  Absence of taking these as
equivalents caused fail during major server upgrade (when the
server machine has a little different encoding then the not yet
actualized data stack).  Workaround for that was changing the
system locale to match the previous locale string.

Applying of this commit makes the comparison to be done in two
phases.  Firstly is compared the encoding part of the locale
string (if any) and then the rest of string.  Before the encoding
part is compared, it is decoded into precisely defined code from
'enum pg_enc'.  This should make the comparison more stable even
for completely different spelling of encoding (e.g. 'latin2' and
'iso 8859-2').

References:
3356208.RhzgiJ6fXA@nb.usersys.redhat.com
20121002155857.GE30089@momjian.us
---
 contrib/pg_upgrade/check.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
new file mode 100644
index a706708..e3b7cda
*** a/contrib/pg_upgrade/check.c
--- b/contrib/pg_upgrade/check.c
***************
*** 9,14 ****
--- 9,15 ----
  
  #include "postgres_fe.h"
  
+ #include "mb/pg_wchar.h"
  #include "pg_upgrade.h"
  
  
*************** set_locale_and_encoding(ClusterInfo *clu
*** 393,398 ****
--- 394,429 ----
  	PQfinish(conn);
  }
  
+ /*
+  * equivalent_locale()
+  *
+  * Best effort locale comparison.  Return false if we are not 100% sure the
+  * locale is equivalent.
+  */
+ static bool
+ equivalent_locale(const char *loca, const char *locb)
+ {
+ 	int enca, encb;
+ 	int lencmp;
+ 
+ 	const char *chara = strrchr(loca, '.');
+ 	const char *charb = strrchr(locb, '.');
+ 	if (!chara || !charb)
+ 		/* not both locale strings do contain encoding part */
+ 		return (pg_strcasecmp(loca, locb) == 0);
+ 
+ 	chara++;
+ 	charb++;
+ 
+ 	enca = pg_valid_server_encoding(chara);
+ 	encb = pg_valid_server_encoding(charb);
+ 
+ 	if (enca < 0 || encb < 0 || enca != encb)
+ 		return (pg_strcasecmp(loca, locb) == 0);
+ 
+ 	lencmp = chara - loca;
+ 	return (pg_strncasecmp(loca, locb, lencmp) == 0);
+ }
  
  /*
   * check_locale_and_encoding()
*************** check_locale_and_encoding(ControlData *o
*** 409,421 ****
  	 * They also often use inconsistent hyphenation, which we cannot fix, e.g.
  	 * UTF-8 vs. UTF8, so at least we display the mismatching values.
  	 */
! 	if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
  		pg_fatal("lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
  			   oldctrl->lc_collate, newctrl->lc_collate);
! 	if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
  		pg_fatal("lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
  			   oldctrl->lc_ctype, newctrl->lc_ctype);
! 	if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
  		pg_fatal("encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
  			   oldctrl->encoding, newctrl->encoding);
  }
--- 440,452 ----
  	 * They also often use inconsistent hyphenation, which we cannot fix, e.g.
  	 * UTF-8 vs. UTF8, so at least we display the mismatching values.
  	 */
! 	if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
  		pg_fatal("lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
  			   oldctrl->lc_collate, newctrl->lc_collate);
! 	if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
  		pg_fatal("lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
  			   oldctrl->lc_ctype, newctrl->lc_ctype);
! 	if (!equivalent_locale(oldctrl->encoding, newctrl->encoding))
  		pg_fatal("encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
  			   oldctrl->encoding, newctrl->encoding);
  }
-- 
1.8.4.2