>From 40c6cb64f3dd974c9ee48baf0900f01f62f65291 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Sat, 21 Dec 2013 01:27:01 +0100 Subject: [PATCH] pg_upgrade: make the locale comparison more tolerating Locale strings specified like 'cs_CZ.utf8' and 'cs_CZ.UTF-8' should be treat as equivalent. Absence of taking these as equivalents caused fail during major server upgrade (when the server machine has a little different encoding then the not yet actualized data stack). Workaround for that was changing the system locale to match the previous locale string. Applying of this commit makes the comparison to be done in two phases. Firstly is compared the encoding part of the locale string (if any) and then the rest of string. Before the encoding part is compared, it is decoded into precisely defined code from 'enum pg_enc'. This should make the comparison more stable even for completely different spelling of encoding (e.g. 'latin2' and 'iso 8859-2'). References: 3356208.RhzgiJ6fXA@nb.usersys.redhat.com 20121002155857.GE30089@momjian.us --- contrib/pg_upgrade/check.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c new file mode 100644 index a706708..e3b7cda *** a/contrib/pg_upgrade/check.c --- b/contrib/pg_upgrade/check.c *************** *** 9,14 **** --- 9,15 ---- #include "postgres_fe.h" + #include "mb/pg_wchar.h" #include "pg_upgrade.h" *************** set_locale_and_encoding(ClusterInfo *clu *** 393,398 **** --- 394,429 ---- PQfinish(conn); } + /* + * equivalent_locale() + * + * Best effort locale comparison. Return false if we are not 100% sure the + * locale is equivalent. + */ + static bool + equivalent_locale(const char *loca, const char *locb) + { + int enca, encb; + int lencmp; + + const char *chara = strrchr(loca, '.'); + const char *charb = strrchr(locb, '.'); + if (!chara || !charb) + /* not both locale strings do contain encoding part */ + return (pg_strcasecmp(loca, locb) == 0); + + chara++; + charb++; + + enca = pg_valid_server_encoding(chara); + encb = pg_valid_server_encoding(charb); + + if (enca < 0 || encb < 0 || enca != encb) + return (pg_strcasecmp(loca, locb) == 0); + + lencmp = chara - loca; + return (pg_strncasecmp(loca, locb, lencmp) == 0); + } /* * check_locale_and_encoding() *************** check_locale_and_encoding(ControlData *o *** 409,421 **** * They also often use inconsistent hyphenation, which we cannot fix, e.g. * UTF-8 vs. UTF8, so at least we display the mismatching values. */ ! if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0) pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n", oldctrl->lc_collate, newctrl->lc_collate); ! if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0) pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n", oldctrl->lc_ctype, newctrl->lc_ctype); ! if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0) pg_fatal("encoding cluster values do not match: old \"%s\", new \"%s\"\n", oldctrl->encoding, newctrl->encoding); } --- 440,452 ---- * They also often use inconsistent hyphenation, which we cannot fix, e.g. * UTF-8 vs. UTF8, so at least we display the mismatching values. */ ! if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate)) pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n", oldctrl->lc_collate, newctrl->lc_collate); ! if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype)) pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n", oldctrl->lc_ctype, newctrl->lc_ctype); ! if (!equivalent_locale(oldctrl->encoding, newctrl->encoding)) pg_fatal("encoding cluster values do not match: old \"%s\", new \"%s\"\n", oldctrl->encoding, newctrl->encoding); } -- 1.8.4.2