From 0145d9050ba3feef163c1d4a13660f904167458a Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Mon, 5 Dec 2022 10:43:52 -0800 Subject: [PATCH v2 3/6] Refactor pg_locale_t routines. * add pg_locale_internal.h to hide pg_locale_struct * move info.lt into info.libc.lt to match icu * introduce init_default_locale() * introduce collation_version_default_locale() * introduce pg_locale_deterministic() accessor * make default_locale a static global in pg_locale.c * refactor pg_newlocale_from_collation() to use multiple static functions and avoid allocating in TopMemoryContext until necessary * refactor get_collation_actual_version() to use pg_newlocale() --- src/backend/access/hash/hashfunc.c | 82 ++--- src/backend/commands/collationcmds.c | 1 + src/backend/regex/regc_pg_locale.c | 45 +-- src/backend/utils/adt/formatting.c | 25 +- src/backend/utils/adt/like.c | 3 +- src/backend/utils/adt/like_support.c | 3 +- src/backend/utils/adt/pg_locale.c | 459 +++++++++++++++++-------- src/backend/utils/adt/varchar.c | 62 ++-- src/backend/utils/adt/varlena.c | 8 +- src/backend/utils/init/postinit.c | 30 +- src/include/utils/pg_locale.h | 55 +-- src/include/utils/pg_locale_internal.h | 68 ++++ 12 files changed, 508 insertions(+), 333 deletions(-) create mode 100644 src/include/utils/pg_locale_internal.h diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index b8136e496f..6d9f014c5b 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -281,36 +281,28 @@ hashtext(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!mylocale || mylocale->deterministic) + if (pg_locale_deterministic(mylocale)) { result = hash_any((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); } else { -#ifdef USE_ICU - if (mylocale->provider == COLLPROVIDER_ICU) - { - Size bsize, rsize; - char *buf; - const char *keydata = VARDATA_ANY(key); - size_t keylen = VARSIZE_ANY_EXHDR(key); - - bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); - buf = palloc(bsize); - - rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); - if (rsize != bsize) - elog(ERROR, "pg_strnxfrm() returned unexpected result"); - - result = hash_any((uint8_t *) buf, bsize); - - pfree(buf); - } - else -#endif - /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + Size bsize, rsize; + char *buf; + const char *keydata = VARDATA_ANY(key); + size_t keylen = VARSIZE_ANY_EXHDR(key); + + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize); + + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + result = hash_any((uint8_t *) buf, bsize); + + pfree(buf); } /* Avoid leaking memory for toasted inputs */ @@ -336,7 +328,7 @@ hashtextextended(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!mylocale || mylocale->deterministic) + if (pg_locale_deterministic(mylocale)) { result = hash_any_extended((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key), @@ -344,30 +336,22 @@ hashtextextended(PG_FUNCTION_ARGS) } else { -#ifdef USE_ICU - if (mylocale->provider == COLLPROVIDER_ICU) - { - Size bsize, rsize; - char *buf; - const char *keydata = VARDATA_ANY(key); - size_t keylen = VARSIZE_ANY_EXHDR(key); - - bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); - buf = palloc(bsize); - - rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); - if (rsize != bsize) - elog(ERROR, "pg_strnxfrm() returned unexpected result"); - - result = hash_any_extended((uint8_t *) buf, bsize, - PG_GETARG_INT64(1)); - - pfree(buf); - } - else -#endif - /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + Size bsize, rsize; + char *buf; + const char *keydata = VARDATA_ANY(key); + size_t keylen = VARSIZE_ANY_EXHDR(key); + + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize); + + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + result = hash_any_extended((uint8_t *) buf, bsize, + PG_GETARG_INT64(1)); + + pfree(buf); } PG_FREE_IF_COPY(key, 0); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 81e54e0ce6..9e84da4891 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -36,6 +36,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +#include "utils/pg_locale_internal.h" #include "utils/rel.h" #include "utils/syscache.h" diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 02d462a659..ac05efb558 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -17,6 +17,7 @@ #include "catalog/pg_collation.h" #include "utils/pg_locale.h" +#include "utils/pg_locale_internal.h" /* * To provide as much functionality as possible on a variety of platforms, @@ -306,13 +307,13 @@ pg_wc_isdigit(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); + return iswdigit_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); + isdigit_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -342,13 +343,13 @@ pg_wc_isalpha(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); + return iswalpha_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); + isalpha_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -378,13 +379,13 @@ pg_wc_isalnum(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); + return iswalnum_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); + isalnum_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -423,13 +424,13 @@ pg_wc_isupper(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale->info.lt); + return iswupper_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale->info.lt)); + isupper_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -459,13 +460,13 @@ pg_wc_islower(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale->info.lt); + return iswlower_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale->info.lt)); + islower_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -495,13 +496,13 @@ pg_wc_isgraph(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); + return iswgraph_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); + isgraph_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -531,13 +532,13 @@ pg_wc_isprint(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale->info.lt); + return iswprint_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale->info.lt)); + isprint_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -567,13 +568,13 @@ pg_wc_ispunct(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); + return iswpunct_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); + ispunct_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -603,13 +604,13 @@ pg_wc_isspace(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale->info.lt); + return iswspace_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale->info.lt)); + isspace_l((unsigned char) c, pg_regex_locale->info.libc.lt)); #endif break; case PG_REGEX_LOCALE_ICU: @@ -647,13 +648,13 @@ pg_wc_toupper(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale->info.lt); + return towupper_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale->info.lt); + return toupper_l((unsigned char) c, pg_regex_locale->info.libc.lt); #endif return c; case PG_REGEX_LOCALE_ICU: @@ -691,13 +692,13 @@ pg_wc_tolower(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #ifdef HAVE_LOCALE_T if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale->info.lt); + return towlower_l((wint_t) c, pg_regex_locale->info.libc.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale->info.lt); + return tolower_l((unsigned char) c, pg_regex_locale->info.libc.lt); #endif return c; case PG_REGEX_LOCALE_ICU: diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 26f498b5df..a4bc7fa5f5 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -87,6 +87,7 @@ #include "utils/memutils.h" #include "utils/numeric.h" #include "utils/pg_locale.h" +#include "utils/pg_locale_internal.h" /* ---------- * Convenience macros for error handling @@ -1611,7 +1612,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, - mylocale->info.icu.locale, &status); + mylocale->ctype, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { /* try again with adjusted length */ @@ -1619,7 +1620,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, - mylocale->info.icu.locale, &status); + mylocale->ctype, &status); } if (U_FAILURE(status)) ereport(ERROR, @@ -1732,7 +1733,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) { #ifdef HAVE_LOCALE_T if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt); else #endif workspace[curr_char] = towlower(workspace[curr_char]); @@ -1765,7 +1766,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) { #ifdef HAVE_LOCALE_T if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); + *p = tolower_l((unsigned char) *p, mylocale->info.libc.lt); else #endif *p = pg_tolower((unsigned char) *p); @@ -1854,7 +1855,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) { #ifdef HAVE_LOCALE_T if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt); else #endif workspace[curr_char] = towupper(workspace[curr_char]); @@ -1887,7 +1888,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) { #ifdef HAVE_LOCALE_T if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale->info.lt); + *p = toupper_l((unsigned char) *p, mylocale->info.libc.lt); else #endif *p = pg_toupper((unsigned char) *p); @@ -1979,10 +1980,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (mylocale) { if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt); else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.libc.lt); } else #endif @@ -2024,10 +2025,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (mylocale) { if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); + *p = tolower_l((unsigned char) *p, mylocale->info.libc.lt); else - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + *p = toupper_l((unsigned char) *p, mylocale->info.libc.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.libc.lt); } else #endif diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 8e671b9fab..98714a0492 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -24,6 +24,7 @@ #include "miscadmin.h" #include "utils/builtins.h" #include "utils/pg_locale.h" +#include "utils/pg_locale_internal.h" #define LIKE_TRUE 1 @@ -96,7 +97,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) return pg_ascii_tolower(c); #ifdef HAVE_LOCALE_T else if (locale) - return tolower_l(c, locale->info.lt); + return tolower_l(c, locale->info.libc.lt); #endif else return pg_tolower(c); diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 2d3aaaaf6b..28d23ac3ab 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -52,6 +52,7 @@ #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +#include "utils/pg_locale_internal.h" #include "utils/selfuncs.h" #include "utils/varlena.h" @@ -1511,7 +1512,7 @@ pattern_char_isalpha(char c, bool is_multibyte, (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); #ifdef HAVE_LOCALE_T else if (locale && locale->provider == COLLPROVIDER_LIBC) - return isalpha_l((unsigned char) c, locale->info.lt); + return isalpha_l((unsigned char) c, locale->info.libc.lt); #endif else return isalpha((unsigned char) c); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 133bb03a13..0a19845df4 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -65,6 +65,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/pg_locale_internal.h" #include "utils/syscache.h" #ifdef USE_ICU @@ -128,6 +129,11 @@ static HTAB *collation_cache = NULL; static char *IsoLocaleName(const char *); #endif +/* + * Database default locale. + */ +static pg_locale_t default_locale = NULL; + #ifdef USE_ICU /* * Converter object for converting between ICU's UChar strings and C strings @@ -1333,7 +1339,7 @@ lc_collate_is_c(Oid collation) static int result = -1; char *localeptr; - if (default_locale.provider == COLLPROVIDER_ICU) + if (default_locale->provider == COLLPROVIDER_ICU) return false; if (result >= 0) @@ -1386,7 +1392,7 @@ lc_ctype_is_c(Oid collation) static int result = -1; char *localeptr; - if (default_locale.provider == COLLPROVIDER_ICU) + if (default_locale->provider == COLLPROVIDER_ICU) return false; if (result >= 0) @@ -1417,38 +1423,6 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } -struct pg_locale_struct default_locale; - -void -make_icu_collator(const char *iculocstr, - struct pg_locale_struct *resultp) -{ -#ifdef USE_ICU - UCollator *collator; - UErrorCode status; - - status = U_ZERO_ERROR; - collator = ucol_open(iculocstr, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not open collator for locale \"%s\": %s", - iculocstr, u_errorName(status)))); - - if (U_ICU_VERSION_MAJOR_NUM < 54) - icu_set_collation_attributes(collator, iculocstr); - - /* We will leak this string if the caller errors later :-( */ - resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr); - resultp->info.icu.ucol = collator; -#else /* not USE_ICU */ - /* could get here if a collation was created by a build with ICU */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ICU is not supported in this build"))); -#endif /* not USE_ICU */ -} - - /* simple subroutine for reporting errors from newlocale() */ #ifdef HAVE_LOCALE_T static void @@ -1482,6 +1456,261 @@ report_newlocale_failure(const char *localename) #endif /* HAVE_LOCALE_T */ +/* + * Construct a new pg_locale_t object. + * + * Passing NULL for the version is allowed; and even if it is specified, the + * result may or may not have an exactly matching version. Other parameters + * are required. Caller should pass isdefault=true if initializing + * default_locale; false otherwise. + * + * Structures are allocated in CurrentMemoryContext. The libc locale_t or + * UCollator is not allocated in any memory context, so the caller should be + * sure to call pg_freelocale() to close it. + */ +static pg_locale_t +pg_newlocale(char provider, bool isdefault, bool deterministic, + const char *collate, const char *ctype, const char *version) +{ + pg_locale_t result = palloc0(sizeof(struct pg_locale_struct)); + + /* + * If COLLPROVIDER_DEFAULT, caller should use default_locale or NULL + * instead. + */ + Assert(provider != COLLPROVIDER_DEFAULT); + + if (provider == COLLPROVIDER_LIBC && isdefault) + { + /* + * When the default locale is libc, the actual locale settings are + * controlled by setlocale(), so there's nothing to do here. + */ + } + else if (provider == COLLPROVIDER_LIBC) + { +#ifdef HAVE_LOCALE_T + locale_t loc; + + /* newlocale's result may be leaked if we encounter an error */ + + if (strcmp(collate, ctype) == 0) + { + /* Normal case where they're the same */ + errno = 0; +#ifndef WIN32 + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate, + NULL); +#else + loc = _create_locale(LC_ALL, collate); +#endif + if (!loc) + report_newlocale_failure(collate); + } + else + { +#ifndef WIN32 + /* We need two newlocale() steps */ + locale_t loc1; + + errno = 0; + loc1 = newlocale(LC_COLLATE_MASK, collate, NULL); + if (!loc1) + report_newlocale_failure(collate); + errno = 0; + loc = newlocale(LC_CTYPE_MASK, ctype, loc1); + if (!loc) + report_newlocale_failure(ctype); +#else + + /* + * XXX The _create_locale() API doesn't appear to support + * this. Could perhaps be worked around by changing + * pg_locale_t to contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + result->info.libc.lt = loc; +#else /* not HAVE_LOCALE_T */ + /* platform that doesn't support locale_t */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collation provider LIBC is not supported on this platform"))); +#endif /* not HAVE_LOCALE_T */ + } +#ifdef USE_ICU + else if (provider == COLLPROVIDER_ICU) + { + UCollator *collator; + UErrorCode status; + + /* collator may be leaked if we encounter an error */ + + status = U_ZERO_ERROR; + collator = ucol_open(collate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collate, u_errorName(status)))); + + if (U_ICU_VERSION_MAJOR_NUM < 54) + icu_set_collation_attributes(collator, collate); + + result->info.icu.ucol = collator; + } +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", provider); + + result->provider = provider; + result->deterministic = deterministic; + result->collate = pstrdup(collate); + result->ctype = pstrdup(ctype); + + return result; +} + +/* + * Move resources from the given locale into the given mcxt, consuming and + * freeing the given locale and returning the new one. + */ +static pg_locale_t +pg_movelocale(MemoryContext mcxt, pg_locale_t *plocale) +{ + pg_locale_t locale = *plocale; + pg_locale_t result; + + Assert(locale != default_locale); + + result = MemoryContextAllocZero(mcxt, sizeof(struct pg_locale_struct)); + + result->provider = locale->provider; + result->deterministic = locale->deterministic; + + if (locale->provider == COLLPROVIDER_LIBC) + { +#ifdef HAVE_LOCALE_T + if (locale->info.libc.lt != NULL) + { + /* not in a memory context; just reassign the pointer */ + result->info.libc.lt = locale->info.libc.lt; + } +#endif + } +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + { + /* not in a memory context; just reassign the pointer */ + result->info.icu.ucol = locale->info.icu.ucol; + } +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + result->collate = MemoryContextStrdup(mcxt, locale->collate); + pfree(locale->collate); + + result->ctype = MemoryContextStrdup(mcxt, locale->ctype); + pfree(locale->ctype); + + pfree(locale); + *plocale = NULL; + + return result; +} + +/* + * Free pg_locale_t and close libc locale_t or UCollator. + */ +#ifdef USE_ICU +static void +pg_freelocale(pg_locale_t locale) +{ + if (!locale) + return; + + Assert(locale != default_locale); + + pfree(locale->collate); + pfree(locale->ctype); + + if (locale->provider == COLLPROVIDER_LIBC) + { +#ifdef HAVE_LOCALE_T + if (locale->info.libc.lt != NULL) + { +#ifndef WIN32 + freelocale(locale->info.libc.lt); +#else + _free_locale(locale->info.libc.lt); +#endif + } +#endif + } +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + { + ucol_close(locale->info.icu.ucol); + } +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + pfree(locale); +} +#endif + +/* + * Accessor so that callers don't need to include pg_locale_internal.h. + */ +bool +pg_locale_deterministic(pg_locale_t locale) +{ + if (locale == NULL) + return true; + else + return locale->deterministic; +} + +/* + * Initialize default database locale. + */ +void +init_default_locale(char provider, const char *collate, const char *ctype, + const char *version) +{ + pg_locale_t temp_locale; + bool deterministic; + + /* + * Default locale is currently always deterministic. Nondeterministic + * locales currently don't support pattern matching, which would break a + * lot of things if applied globally. + */ + deterministic = true; + temp_locale = pg_newlocale(provider, true, deterministic, collate, + ctype, version); + + default_locale = pg_movelocale(TopMemoryContext, &temp_locale); +} + +/* + * Return palloc'd version string for the default locale. + */ +char * +default_locale_collation_version() +{ + return get_collation_actual_version(default_locale->provider, + default_locale->collate); +} + /* * Create a locale_t from a collation OID. Results are cached for the * lifetime of the backend. Thus, do not free the result with freelocale(). @@ -1506,8 +1735,8 @@ pg_newlocale_from_collation(Oid collid) if (collid == DEFAULT_COLLATION_OID) { - if (default_locale.provider == COLLPROVIDER_ICU) - return &default_locale; + if (default_locale->provider == COLLPROVIDER_ICU) + return default_locale; else return (pg_locale_t) 0; } @@ -1519,107 +1748,65 @@ pg_newlocale_from_collation(Oid collid) /* We haven't computed this yet in this session, so do it */ HeapTuple tp; Form_pg_collation collform; - struct pg_locale_struct result; - pg_locale_t resultp; + pg_locale_t temp_locale; + pg_locale_t perm_locale; Datum datum; bool isnull; + char *collate; + char *ctype; + char *collversionstr; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for collation %u", collid); collform = (Form_pg_collation) GETSTRUCT(tp); - /* We'll fill in the result struct locally before allocating memory */ - memset(&result, 0, sizeof(result)); - result.provider = collform->collprovider; - result.deterministic = collform->collisdeterministic; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, + &isnull); + if (!isnull) + collversionstr = TextDatumGetCString(datum); + else + collversionstr = NULL; if (collform->collprovider == COLLPROVIDER_LIBC) { -#ifdef HAVE_LOCALE_T - const char *collcollate; - const char *collctype pg_attribute_unused(); - locale_t loc; - - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, + &isnull); Assert(!isnull); - collcollate = TextDatumGetCString(datum); - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); + collate = TextDatumGetCString(datum); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, + &isnull); Assert(!isnull); - collctype = TextDatumGetCString(datum); - - if (strcmp(collcollate, collctype) == 0) - { - /* Normal case where they're the same */ - errno = 0; -#ifndef WIN32 - loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, - NULL); -#else - loc = _create_locale(LC_ALL, collcollate); -#endif - if (!loc) - report_newlocale_failure(collcollate); - } - else - { -#ifndef WIN32 - /* We need two newlocale() steps */ - locale_t loc1; - - errno = 0; - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); - if (!loc1) - report_newlocale_failure(collcollate); - errno = 0; - loc = newlocale(LC_CTYPE_MASK, collctype, loc1); - if (!loc) - report_newlocale_failure(collctype); -#else - - /* - * XXX The _create_locale() API doesn't appear to support - * this. Could perhaps be worked around by changing - * pg_locale_t to contain two separate fields. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collations with different collate and ctype values are not supported on this platform"))); -#endif - } - - result.info.lt = loc; -#else /* not HAVE_LOCALE_T */ - /* platform that doesn't support locale_t */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collation provider LIBC is not supported on this platform"))); -#endif /* not HAVE_LOCALE_T */ + ctype = TextDatumGetCString(datum); } +#ifdef USE_ICU else if (collform->collprovider == COLLPROVIDER_ICU) { - const char *iculocstr; - - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, + &isnull); Assert(!isnull); - iculocstr = TextDatumGetCString(datum); - make_icu_collator(iculocstr, &result); + collate = TextDatumGetCString(datum); + + /* for ICU, collate and ctype are both set from iculocale */ + ctype = collate; } +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collform->collprovider); - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, - &isnull); - if (!isnull) + temp_locale = pg_newlocale(collform->collprovider, false, + collform->collisdeterministic, + collate, ctype, collversionstr); + + ReleaseSysCache(tp); + + if (collversionstr != NULL) { char *actual_versionstr; - char *collversionstr; - - collversionstr = TextDatumGetCString(datum); - datum = SysCacheGetAttr(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull); - Assert(!isnull); + actual_versionstr = get_collation_actual_version(collform->collprovider, collate); - actual_versionstr = get_collation_actual_version(collform->collprovider, - TextDatumGetCString(datum)); if (!actual_versionstr) { /* @@ -1646,13 +1833,10 @@ pg_newlocale_from_collation(Oid collid) NameStr(collform->collname))))); } - ReleaseSysCache(tp); - - /* We'll keep the pg_locale_t structures in TopMemoryContext */ - resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp)); - *resultp = result; + /* move into TopMemoryContext */ + perm_locale = pg_movelocale(TopMemoryContext, &temp_locale); - cache_entry->locale = resultp; + cache_entry->locale = perm_locale; } return cache_entry->locale; @@ -1812,7 +1996,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, errno = 0; #ifdef HAVE_LOCALE_T if (locale) - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.libc.lt); else #endif result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); @@ -1855,7 +2039,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) if (locale) { #ifdef HAVE_LOCALE_T - result = strcoll_l(arg1, arg2, locale->info.lt); + result = strcoll_l(arg1, arg2, locale->info.libc.lt); #else /* shouldn't happen */ elog(ERROR, "unsupported collprovider: %c", locale->provider); @@ -2108,7 +2292,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize, #ifdef TRUST_STXFRM #ifdef HAVE_LOCALE_T if (locale) - return strxfrm_l(dest, src, destsize, locale->info.lt); + return strxfrm_l(dest, src, destsize, locale->info.libc.lt); else #endif return strxfrm(dest, src, destsize); @@ -2718,19 +2902,16 @@ void check_icu_locale(const char *icu_locale) { #ifdef USE_ICU - UCollator *collator; - UErrorCode status; + pg_locale_t locale; - status = U_ZERO_ERROR; - collator = ucol_open(icu_locale, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not open collator for locale \"%s\": %s", - icu_locale, u_errorName(status)))); - - if (U_ICU_VERSION_MAJOR_NUM < 54) - icu_set_collation_attributes(collator, icu_locale); - ucol_close(collator); + /* + * Whether it's deterministic doesn't matter in this case, because it + * doesn't affect whether the locale is valid or not; and we're going to + * discard the locale anyway. + */ + locale = pg_newlocale(COLLPROVIDER_ICU, false, true, icu_locale, + icu_locale, NULL); + pg_freelocale(locale); #else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -2793,10 +2974,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) #ifdef HAVE_LOCALE_T #ifdef HAVE_WCSTOMBS_L /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale->info.lt); + result = wcstombs_l(to, from, tolen, locale->info.libc.lt); #else /* !HAVE_WCSTOMBS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale->info.lt); + locale_t save_locale = uselocale(locale->info.libc.lt); result = wcstombs(to, from, tolen); @@ -2870,10 +3051,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, #ifdef HAVE_LOCALE_T #ifdef HAVE_MBSTOWCS_L /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale->info.lt); + result = mbstowcs_l(to, str, tolen, locale->info.libc.lt); #else /* !HAVE_MBSTOWCS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale->info.lt); + locale_t save_locale = uselocale(locale->info.libc.lt); result = mbstowcs(to, str, tolen); diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index d0bc528e9f..52f27d483d 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -757,7 +757,7 @@ bpchareq(PG_FUNCTION_ARGS) else mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || !mylocale || mylocale->deterministic) + if (locale_is_c || pg_locale_deterministic(mylocale)) { /* * Since we only care about equality or not-equality, we can avoid all @@ -802,7 +802,7 @@ bpcharne(PG_FUNCTION_ARGS) else mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || !mylocale || mylocale->deterministic) + if (locale_is_c || pg_locale_deterministic(mylocale)) { /* * Since we only care about equality or not-equality, we can avoid all @@ -1010,33 +1010,25 @@ hashbpchar(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!mylocale || mylocale->deterministic) + if (pg_locale_deterministic(mylocale)) { result = hash_any((unsigned char *) keydata, keylen); } else { -#ifdef USE_ICU - if (mylocale->provider == COLLPROVIDER_ICU) - { - Size bsize, rsize; - char *buf; + Size bsize, rsize; + char *buf; - bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); - buf = palloc(bsize); + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize); - rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); - if (rsize != bsize) - elog(ERROR, "pg_strnxfrm() returned unexpected result"); + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); - result = hash_any((uint8_t *) buf, bsize); + result = hash_any((uint8_t *) buf, bsize); - pfree(buf); - } - else -#endif - /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + pfree(buf); } /* Avoid leaking memory for toasted inputs */ @@ -1067,35 +1059,27 @@ hashbpcharextended(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!mylocale || mylocale->deterministic) + if (pg_locale_deterministic(mylocale)) { result = hash_any_extended((unsigned char *) keydata, keylen, PG_GETARG_INT64(1)); } else { -#ifdef USE_ICU - if (mylocale->provider == COLLPROVIDER_ICU) - { - Size bsize, rsize; - char *buf; + Size bsize, rsize; + char *buf; - bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); - buf = palloc(bsize); + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize); - rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); - if (rsize != bsize) - elog(ERROR, "pg_strnxfrm() returned unexpected result"); + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); - result = hash_any_extended((uint8_t *) buf, bsize, - PG_GETARG_INT64(1)); + result = hash_any_extended((uint8_t *) buf, bsize, + PG_GETARG_INT64(1)); - pfree(buf); - } - else -#endif - /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + pfree(buf); } PG_FREE_IF_COPY(key, 0); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2dfba4b488..a7c39d7afa 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1203,7 +1203,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (mylocale && !mylocale->deterministic) + if (!pg_locale_deterministic(mylocale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for substring searches"))); @@ -1601,7 +1601,7 @@ texteq(PG_FUNCTION_ARGS) else mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || !mylocale || mylocale->deterministic) + if (locale_is_c || pg_locale_deterministic(mylocale)) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1660,7 +1660,7 @@ textne(PG_FUNCTION_ARGS) else mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || !mylocale || mylocale->deterministic) + if (locale_is_c || pg_locale_deterministic(mylocale)) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1774,7 +1774,7 @@ text_starts_with(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (mylocale && !mylocale->deterministic) + if (!pg_locale_deterministic(mylocale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for substring searches"))); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index a990c833c5..c5528cbf64 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -317,6 +317,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect char *collate; char *ctype; char *iculocale; + char *collversionstr; /* Fetch our pg_database row normally, via syscache */ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); @@ -424,35 +425,32 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull); Assert(!isnull); iculocale = TextDatumGetCString(datum); - make_icu_collator(iculocale, &default_locale); } else iculocale = NULL; - default_locale.provider = dbform->datlocprovider; + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_datcollversion, + &isnull); + if (!isnull) + collversionstr = TextDatumGetCString(datum); + else + collversionstr = NULL; - /* - * Default locale is currently always deterministic. Nondeterministic - * locales currently don't support pattern matching, which would break a - * lot of things if applied globally. - */ - default_locale.deterministic = true; + init_default_locale(dbform->datlocprovider, + dbform->datlocprovider == COLLPROVIDER_ICU ? iculocale : collate, + dbform->datlocprovider == COLLPROVIDER_ICU ? iculocale : ctype, + collversionstr); /* * Check collation version. See similar code in * pg_newlocale_from_collation(). Note that here we warn instead of error * in any case, so that we don't prevent connecting. */ - datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_datcollversion, - &isnull); - if (!isnull) + if (collversionstr != NULL) { char *actual_versionstr; - char *collversionstr; - collversionstr = TextDatumGetCString(datum); - - actual_versionstr = get_collation_actual_version(dbform->datlocprovider, dbform->datlocprovider == COLLPROVIDER_ICU ? iculocale : collate); + actual_versionstr = default_locale_collation_version(); if (!actual_versionstr) /* should not happen */ elog(WARNING, @@ -470,6 +468,8 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect "or build PostgreSQL with the right library version.", quote_identifier(name)))); } + else + collversionstr = NULL; /* Make the locale settings visible as GUC variables, too */ SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index ceab0d4307..0d7bc0534f 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -15,22 +15,6 @@ #if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE) #include #endif -#ifdef USE_ICU -#include -#endif - -#ifdef USE_ICU -/* - * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53. - * (see - * ) - */ -#if U_ICU_VERSION_MAJOR_NUM >= 53 -#define HAVE_UCOL_STRCOLLUTF8 1 -#else -#undef HAVE_UCOL_STRCOLLUTF8 -#endif -#endif /* use for libc locale names */ #define LOCALE_NAME_BUFLEN 128 @@ -64,39 +48,12 @@ extern struct lconv *PGLC_localeconv(void); extern void cache_locale_time(void); -/* - * We define our own wrapper around locale_t so we can keep the same - * function signatures for all builds, while not having to create a - * fake version of the standard type locale_t in the global namespace. - * pg_locale_t is occasionally checked for truth, so make it a pointer. - */ -struct pg_locale_struct -{ - char provider; - bool deterministic; - union - { -#ifdef HAVE_LOCALE_T - locale_t lt; -#endif -#ifdef USE_ICU - struct - { - const char *locale; - UCollator *ucol; - } icu; -#endif - int dummy; /* in case we have neither LOCALE_T nor ICU */ - } info; -}; - typedef struct pg_locale_struct *pg_locale_t; -extern PGDLLIMPORT struct pg_locale_struct default_locale; - -extern void make_icu_collator(const char *iculocstr, - struct pg_locale_struct *resultp); - +extern void init_default_locale(char provider, const char *collate, + const char *ctype, const char *version); +extern char *default_locale_collation_version(void); +extern bool pg_locale_deterministic(pg_locale_t locale); extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); @@ -114,10 +71,6 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale); -#ifdef USE_ICU -extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); -extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar); -#endif extern void check_icu_locale(const char *icu_locale); /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ diff --git a/src/include/utils/pg_locale_internal.h b/src/include/utils/pg_locale_internal.h new file mode 100644 index 0000000000..33465ad92d --- /dev/null +++ b/src/include/utils/pg_locale_internal.h @@ -0,0 +1,68 @@ +/*----------------------------------------------------------------------- + * + * PostgreSQL locale utilities + * + * src/include/utils/pg_locale_internal.h + * + * Copyright (c) 2002-2022, PostgreSQL Global Development Group + * + *----------------------------------------------------------------------- + */ + + +#ifndef _PG_LOCALE_INTERNAL_ +#define _PG_LOCALE_INTERNAL_ + +#ifdef USE_ICU +#include +#endif + +#ifdef USE_ICU +/* + * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53. + * (see + * ) + */ +#if U_ICU_VERSION_MAJOR_NUM >= 53 +#define HAVE_UCOL_STRCOLLUTF8 1 +#else +#undef HAVE_UCOL_STRCOLLUTF8 +#endif +#endif + +/* + * We define our own wrapper around locale_t so we can keep the same + * function signatures for all builds, while not having to create a + * fake version of the standard type locale_t in the global namespace. + * pg_locale_t is occasionally checked for truth, so make it a pointer. + */ +struct pg_locale_struct +{ + char provider; + bool deterministic; + char *collate; + char *ctype; + union + { +#ifdef HAVE_LOCALE_T + struct + { + locale_t lt; + } libc; +#endif +#ifdef USE_ICU + struct + { + UCollator *ucol; + } icu; +#endif + int dummy; /* in case we have neither LOCALE_T nor ICU */ + } info; +}; + +#ifdef USE_ICU +extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); +extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar); +#endif + +#endif /* _PG_LOCALE_INTERNAL_ */ -- 2.34.1