From 2b6e8998d016f5aa06d9d043fb538892162e966d Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 14 Mar 2023 22:28:21 -0700 Subject: [PATCH v5 3/5] Handle the "und" locale in ICU versions 54 and older. The "und" locale is an alternative spelling of the root locale, but it was not recognized until ICU 55. To maintain common behavior across all supported ICU versions, check for "und" and replace with "root" before opening. Previously, the lack of support for "und" was dangerous, because versions 54 and older fall back to the environment when a locale is not found. If the user specified "und" for the language (which is expected and documented), it could not only resolve to the wrong collator, but it could unexpectedly change (which could lead to corrupt indexes). This effectively reverts commit d72900bded, which worked around the problem for the built-in "unicode" collation, and is no longer necessary. Discussion: https://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Discussion: https://postgr.es/m/0c6fa66f2753217d2a40480a96bd2ccf023536a1.camel@j-davis.com --- src/backend/utils/adt/pg_locale.c | 30 +++++++++++++++++++ src/bin/initdb/initdb.c | 2 +- src/include/catalog/catversion.h | 2 +- .../regress/expected/collate.icu.utf8.out | 7 +++++ src/test/regress/sql/collate.icu.utf8.sql | 2 ++ 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 28d1715ece..a8cc8ffcb1 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1422,12 +1422,35 @@ lc_ctype_is_c(Oid collation) struct pg_locale_struct default_locale; +#ifdef USE_ICU + static UCollator * pg_ucol_open(const char *locale_str) { UCollator *collator; UErrorCode status; + /* + * In ICU versions 55 and earlier, "und" is not a recognized spelling of + * the root locale. If the first component of the locale is "und", replace + * with "root" before opening. + */ +#if U_ICU_VERSION_MAJOR_NUM < 55 + char *fixed_str = NULL; + + if (strncasecmp(locale_str, "und", strlen("und")) == 0 && + !isalnum(locale_str[strlen("und")])) + { + const char *remainder = locale_str + strlen("und"); + + fixed_str = palloc(strlen("root") + strlen(remainder) + 1); + strcpy(fixed_str, "root"); + strcat(fixed_str, remainder); + + locale_str = fixed_str; + } +#endif + status = U_ZERO_ERROR; collator = ucol_open(locale_str, &status); if (U_FAILURE(status)) @@ -1439,9 +1462,16 @@ pg_ucol_open(const char *locale_str) icu_set_collation_attributes(collator, locale_str); #endif +#if U_ICU_VERSION_MAJOR_NUM < 55 + if (fixed_str != NULL) + pfree(fixed_str); +#endif + return collator; } +#endif + void make_icu_collator(const char *iculocstr, const char *icurules, diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 68d430ed63..d48b7b6060 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1498,7 +1498,7 @@ setup_collation(FILE *cmdfd) * that they win if libc defines a locale with the same name. */ PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)" - "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, '');\n\n", + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU); PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 309aed3703..b2eed22d46 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202303141 +#define CATALOG_VERSION_NO 202303151 #endif diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 6225b575ce..f135200c99 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1312,6 +1312,13 @@ SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; t (1 row) +CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper'); +SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst; + ?column? +---------- + t +(1 row) + CREATE TABLE test1cs (x text COLLATE case_sensitive); CREATE TABLE test2cs (x text COLLATE case_sensitive); CREATE TABLE test3cs (x text COLLATE case_sensitive); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 64cbfd0a5b..8105ebc8ae 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -521,6 +521,8 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse -- test language tags CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; +CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper'); +SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst; CREATE TABLE test1cs (x text COLLATE case_sensitive); CREATE TABLE test2cs (x text COLLATE case_sensitive); -- 2.34.1