From fab2a5c30b560c59f032c468343027a8ca69ac6b Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 1 Dec 2022 14:41:38 -0800 Subject: [PATCH v4 2/6] Add pg_strxfrm() and pg_strxfrm_prefix(). Callers with a NUL-terminated string should call the former; callers with a string and length should call the latter. Also remove the TRUST_STRXFRM define, and replace with a developer GUC for easier testing. --- src/backend/access/hash/hashfunc.c | 45 ++-- src/backend/utils/adt/pg_locale.c | 381 ++++++++++++++++++++++++++++ src/backend/utils/adt/varchar.c | 41 ++- src/backend/utils/adt/varlena.c | 142 +++-------- src/backend/utils/misc/guc_tables.c | 11 + src/include/utils/pg_locale.h | 11 + 6 files changed, 481 insertions(+), 150 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index f890f79ee1..b8136e496f 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -291,21 +291,19 @@ hashtext(PG_FUNCTION_ARGS) #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { - int32_t ulen = -1; - UChar *uchar = NULL; - Size bsize; - uint8_t *buf; + Size bsize, rsize; + char *buf; + const char *keydata = VARDATA_ANY(key); + size_t keylen = VARSIZE_ANY_EXHDR(key); - ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); - - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); - pfree(uchar); - result = hash_any(buf, bsize); + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + result = hash_any((uint8_t *) buf, bsize); pfree(buf); } @@ -349,21 +347,20 @@ hashtextextended(PG_FUNCTION_ARGS) #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { - int32_t ulen = -1; - UChar *uchar = NULL; - Size bsize; - uint8_t *buf; + Size bsize, rsize; + char *buf; + const char *keydata = VARDATA_ANY(key); + size_t keylen = VARSIZE_ANY_EXHDR(key); - ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); - - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); - pfree(uchar); - result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + result = hash_any_extended((uint8_t *) buf, bsize, + PG_GETARG_INT64(1)); pfree(buf); } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 6cd629ecb4..663286163e 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -94,6 +94,9 @@ char *locale_monetary; char *locale_numeric; char *locale_time; +/* GUC to enable use of strxfrm() for abbreviated keys */ +bool trust_strxfrm = false; + /* * lc_time localization cache. * @@ -2099,6 +2102,384 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, } +static size_t +pg_strxfrm_libc(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + +#ifdef TRUST_STXFRM +#ifdef HAVE_LOCALE_T + if (locale) + return strxfrm_l(dest, src, destsize, locale->info.lt); + else +#endif + return strxfrm(dest, src, destsize); +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +#endif +} + +static size_t +pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + size_t bufsize = srclen + 1; + size_t result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + + if (bufsize > TEXTBUFLEN) + buf = palloc(bufsize); + + /* nul-terminate arguments */ + memcpy(buf, src, srclen); + buf[srclen] = '\0'; + + result = pg_strxfrm_libc(dest, buf, destsize, locale); + + if (buf != sbuf) + pfree(buf); + + return result; +} + +static size_t +pg_strxfrm_prefix_libc(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + /* unsupported; shouldn't happen */ + elog(ERROR, "collprovider '%c' does not support pg_strxfrm_prefix()", + locale->provider); +} + +static size_t +pg_strnxfrm_prefix_libc(char *dest, const char *src, size_t srclen, + size_t destsize, pg_locale_t locale) +{ + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + /* unsupported; shouldn't happen */ + elog(ERROR, "collprovider '%c' does not support pg_strnxfrm_prefix()", + locale->provider); +} + +#ifdef USE_ICU + +static size_t +pg_strnxfrm_icu(char *dest, const char *src, size_t srclen, size_t destsize, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + UChar *uchar; + int32_t ulen; + size_t uchar_bsize; + Size result_bsize; + + Assert(locale->provider == COLLPROVIDER_ICU); + + init_icu_converter(); + + ulen = uchar_length(icu_converter, src, srclen); + + uchar_bsize = (ulen + 1) * sizeof(UChar); + + if (uchar_bsize > TEXTBUFLEN) + buf = palloc(uchar_bsize); + + uchar = (UChar *) buf; + + ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); + + result_bsize = ucol_getSortKey(locale->info.icu.ucol, + uchar, ulen, + (uint8_t *) dest, destsize); + + if (buf != sbuf) + pfree(buf); + + return result_bsize; +} + +static size_t +pg_strxfrm_icu(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + Assert(locale->provider == COLLPROVIDER_ICU); + return pg_strnxfrm_icu(dest, src, -1, destsize, locale); +} + +static size_t +pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, size_t srclen, + size_t destsize, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + int32_t ulen = -1; + UChar *uchar = NULL; + size_t uchar_bsize; + Size result_bsize; + + Assert(locale->provider == COLLPROVIDER_ICU); + Assert(GetDatabaseEncoding() != PG_UTF8); + + init_icu_converter(); + + ulen = uchar_length(icu_converter, src, srclen); + + uchar_bsize = (ulen + 1) * sizeof(UChar); + + if (uchar_bsize > TEXTBUFLEN) + buf = palloc(uchar_bsize); + + uchar = (UChar *) buf; + + ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); + + uiter_setString(&iter, uchar, ulen); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol, + &iter, + state, + (uint8_t *) dest, + destsize, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", + u_errorName(status)))); + + return result_bsize; +} + +static size_t +pg_strnxfrm_prefix_icu(char *dest, const char *src, size_t srclen, + size_t destsize, pg_locale_t locale) +{ + size_t result; + + Assert(locale->provider == COLLPROVIDER_ICU); + + if (GetDatabaseEncoding() == PG_UTF8) + { + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + + uiter_setUTF8(&iter, src, srclen); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + result = ucol_nextSortKeyPart(locale->info.icu.ucol, + &iter, + state, + (uint8_t *) dest, + destsize, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", + u_errorName(status)))); + } + else + result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize, + locale); + + return result; +} + +static size_t +pg_strxfrm_prefix_icu(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + Assert(locale->provider == COLLPROVIDER_ICU); + return pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); +} + +#endif + +/* + * Return true if the collation provider supports pg_strxfrm() and + * pg_strnxfrm(); otherwise false. + * + * Unfortunately, it seems that strxfrm() for non-C collations is broken on + * many common platforms; testing of multiple versions of glibc reveals that, + * for many locales, strcoll() and strxfrm() do not return consistent + * results. While no other libc other than Cygwin has so far been shown to + * have a problem, we take the conservative course of action for right now and + * disable this categorically. (Users who are certain this isn't a problem on + * their system can set the developer GUC "trust_strxfrm".) + * + * No similar problem is known for the ICU provider. + */ +bool +pg_strxfrm_enabled(pg_locale_t locale) +{ + if (!locale || locale->provider == COLLPROVIDER_LIBC) + return trust_strxfrm; + else if (locale->provider == COLLPROVIDER_ICU) + return true; + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +} + +/* + * pg_strxfrm + * + * Transforms 'src' to a nul-terminated string stored in 'dest' such that + * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on + * untransformed strings. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is large enough to hold the result, returns the number of bytes + * copied to 'dest'; otherwise, returns the number of bytes needed to hold the + * result and leaves the contents of 'dest' undefined. If destsize is zero, + * 'dest' may be NULL. + */ +size_t +pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strxfrm_libc(dest, src, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strxfrm_icu(dest, src, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * pg_strnxfrm + * + * Transforms 'src' to a nul-terminated string stored in 'dest' such that + * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on + * untransformed strings. + * + * If destsize is large enough to hold the result, returns the number of bytes + * copied to 'dest'; otherwise, returns the number of bytes needed to hold the + * result and leaves the contents of 'dest' undefined. If destsize is zero, + * 'dest' may be NULL. + * + * This function may need to nul-terminate the argument for libc functions; + * so if the caller already has a nul-terminated string, it should call + * pg_strxfrm() instead. + */ +size_t +pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, + pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * Return true if the collation provider supports pg_strxfrm_prefix() and + * pg_strnxfrm_prefix(); otherwise false. + */ +bool +pg_strxfrm_prefix_enabled(pg_locale_t locale) +{ + if (!locale || locale->provider == COLLPROVIDER_LIBC) + return false; + else if (locale->provider == COLLPROVIDER_ICU) + return true; + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +} + +/* + * pg_strxfrm_prefix + * + * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary + * memcmp() on the byte sequence is equivalent to pg_strcoll() on + * untransformed strings. The result is not nul-terminated. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is not large enough to hold the entire result, stores just the + * prefix in 'dest'. Returns the number of bytes actually copied to 'dest'. + */ +size_t +pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strxfrm_prefix_libc(dest, src, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strxfrm_prefix_icu(dest, src, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * pg_strnxfrm_prefix + * + * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary + * memcmp() on the byte sequence is equivalent to pg_strcoll() on + * untransformed strings. The result is not nul-terminated. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is not large enough to hold the entire result, stores just the + * prefix in 'dest'. Returns the number of bytes actually copied to 'dest'. + * + * This function may need to nul-terminate the argument for libc functions; + * so if the caller already has a nul-terminated string, it should call + * pg_strxfrm_prefix() instead. + */ +size_t +pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, + size_t srclen, pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strnxfrm_prefix_libc(dest, src, srclen, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_prefix_icu(dest, src, srclen, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + #ifdef USE_ICU static void init_icu_converter(void) diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 01a2db6b23..1963d4dc4e 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -1024,21 +1024,17 @@ hashbpchar(PG_FUNCTION_ARGS) #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { - int32_t ulen = -1; - UChar *uchar = NULL; - Size bsize; - uint8_t *buf; + Size bsize, rsize; + char *buf; - ulen = icu_to_uchar(&uchar, keydata, keylen); - - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); - pfree(uchar); - result = hash_any(buf, bsize); + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + result = hash_any((uint8_t *) buf, bsize); pfree(buf); } @@ -1086,21 +1082,18 @@ hashbpcharextended(PG_FUNCTION_ARGS) #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { - int32_t ulen = -1; - UChar *uchar = NULL; - Size bsize; - uint8_t *buf; + Size bsize, rsize; + char *buf; - ulen = icu_to_uchar(&uchar, keydata, keylen); - - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); - pfree(uchar); - result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + rsize = pg_strnxfrm(buf, bsize, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + result = hash_any_extended((uint8_t *) buf, bsize, + PG_GETARG_INT64(1)); pfree(buf); } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 52dd0bbba8..8da545841f 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1889,20 +1889,6 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) */ locale = pg_newlocale_from_collation(collid); - /* - * There is a further exception on Windows. When the database - * encoding is UTF-8 and we are not using the C collation, complex - * hacks are required. We don't currently have a comparator that - * handles that case, so we fall back on the slow method of having the - * sort code invoke bttextcmp() (in the case of text) via the fmgr - * trampoline. ICU locales work just the same on Windows, however. - */ -#ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8 && - !(locale && locale->provider == COLLPROVIDER_ICU)) - return; -#endif - /* * We use varlenafastcmp_locale except for type NAME. */ @@ -1918,13 +1904,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) /* * Unfortunately, it seems that abbreviation for non-C collations is - * broken on many common platforms; testing of multiple versions of glibc - * reveals that, for many locales, strcoll() and strxfrm() do not return - * consistent results, which is fatal to this optimization. While no - * other libc other than Cygwin has so far been shown to have a problem, - * we take the conservative course of action for right now and disable - * this categorically. (Users who are certain this isn't a problem on - * their system can define TRUST_STRXFRM.) + * broken on many common platforms; see pg_strxfrm_enabled(). * * Even apart from the risk of broken locales, it's possible that there * are platforms where the use of abbreviated keys should be disabled at @@ -1937,10 +1917,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) * categorically, we may still want or need to disable it for particular * platforms. */ -#ifndef TRUST_STRXFRM - if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU)) + if (!collate_c && !pg_strxfrm_enabled(locale)) abbreviate = false; -#endif /* * If we're using abbreviated keys, or if we're using a locale-aware @@ -2229,6 +2207,7 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) static Datum varstr_abbrev_convert(Datum original, SortSupport ssup) { + const size_t max_prefix_bytes = sizeof(Datum); VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra; VarString *authoritative = DatumGetVarStringPP(original); char *authoritative_data = VARDATA_ANY(authoritative); @@ -2241,7 +2220,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) pres = (char *) &res; /* memset(), so any non-overwritten bytes are NUL */ - memset(pres, 0, sizeof(Datum)); + memset(pres, 0, max_prefix_bytes); len = VARSIZE_ANY_EXHDR(authoritative); /* Get number of bytes, ignoring trailing spaces */ @@ -2276,14 +2255,10 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) * thing: explicitly consider string length. */ if (sss->collate_c) - memcpy(pres, authoritative_data, Min(len, sizeof(Datum))); + memcpy(pres, authoritative_data, Min(len, max_prefix_bytes)); else { Size bsize; -#ifdef USE_ICU - int32_t ulen = -1; - UChar *uchar = NULL; -#endif /* * We're not using the C collation, so fall back on strxfrm or ICU @@ -2301,7 +2276,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) if (sss->last_len1 == len && sss->cache_blob && memcmp(sss->buf1, authoritative_data, len) == 0) { - memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2)); + memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2)); /* No change affecting cardinality, so no hashing required */ goto done; } @@ -2309,81 +2284,49 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) memcpy(sss->buf1, authoritative_data, len); /* - * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not - * necessary for ICU, but doesn't hurt. + * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated + * strings. */ sss->buf1[len] = '\0'; sss->last_len1 = len; -#ifdef USE_ICU - /* When using ICU and not UTF8, convert string to UChar. */ - if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU && - GetDatabaseEncoding() != PG_UTF8) - ulen = icu_to_uchar(&uchar, sss->buf1, len); -#endif - - /* - * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer, - * and try again. Both of these functions have the result buffer - * content undefined if the result did not fit, so we need to retry - * until everything fits, even though we only need the first few bytes - * in the end. When using ucol_nextSortKeyPart(), however, we only - * ask for as many bytes as we actually need. - */ - for (;;) + if (pg_strxfrm_prefix_enabled(sss->locale)) { -#ifdef USE_ICU - if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU) + if (sss->buflen2 < max_prefix_bytes) { - /* - * When using UTF8, use the iteration interface so we only - * need to produce as many bytes as we actually need. - */ - if (GetDatabaseEncoding() == PG_UTF8) - { - UCharIterator iter; - uint32_t state[2]; - UErrorCode status; - - uiter_setUTF8(&iter, sss->buf1, len); - state[0] = state[1] = 0; /* won't need that again */ - status = U_ZERO_ERROR; - bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol, - &iter, - state, - (uint8_t *) sss->buf2, - Min(sizeof(Datum), sss->buflen2), - &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("sort key generation failed: %s", - u_errorName(status)))); - } - else - bsize = ucol_getSortKey(sss->locale->info.icu.ucol, - uchar, ulen, - (uint8_t *) sss->buf2, sss->buflen2); + sss->buflen2 = Max(max_prefix_bytes, + Min(sss->buflen2 * 2, MaxAllocSize)); + sss->buf2 = repalloc(sss->buf2, sss->buflen2); } - else -#endif -#ifdef HAVE_LOCALE_T - if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC) - bsize = strxfrm_l(sss->buf2, sss->buf1, - sss->buflen2, sss->locale->info.lt); - else -#endif - bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2); - - sss->last_len2 = bsize; - if (bsize < sss->buflen2) - break; + bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1, + max_prefix_bytes, sss->locale); + } + else + { /* - * Grow buffer and retry. + * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try + * again. The pg_strxfrm() function leaves the result buffer + * content undefined if the result did not fit, so we need to + * retry until everything fits, even though we only need the first + * few bytes in the end. */ - sss->buflen2 = Max(bsize + 1, - Min(sss->buflen2 * 2, MaxAllocSize)); - sss->buf2 = repalloc(sss->buf2, sss->buflen2); + for (;;) + { + bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2, + sss->locale); + + sss->last_len2 = bsize; + if (bsize < sss->buflen2) + break; + + /* + * Grow buffer and retry. + */ + sss->buflen2 = Max(bsize + 1, + Min(sss->buflen2 * 2, MaxAllocSize)); + sss->buf2 = repalloc(sss->buf2, sss->buflen2); + } } /* @@ -2395,12 +2338,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) * (Actually, even if there were NUL bytes in the blob it would be * okay. See remarks on bytea case above.) */ - memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize)); - -#ifdef USE_ICU - if (uchar) - pfree(uchar); -#endif + memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize)); } /* diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 1bf14eec66..c4a6c5cb83 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1933,6 +1933,17 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"trust_strxfrm", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Allow use of strxfrm() for abbreviated keys optimization for libc provider."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &trust_strxfrm, + false, + NULL, NULL, NULL + }, + { {"data_sync_retry", PGC_POSTMASTER, ERROR_HANDLING_OPTIONS, gettext_noop("Whether to continue running after a failure to sync data files."), diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index bf70ae08ca..2bd96ab7e1 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -40,6 +40,7 @@ extern PGDLLIMPORT char *locale_messages; extern PGDLLIMPORT char *locale_monetary; extern PGDLLIMPORT char *locale_numeric; extern PGDLLIMPORT char *locale_time; +extern PGDLLIMPORT bool trust_strxfrm; /* lc_time localization cache */ extern PGDLLIMPORT char *localized_abbrev_days[]; @@ -103,6 +104,16 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale); extern int pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale); +extern bool pg_strxfrm_enabled(pg_locale_t locale); +extern size_t pg_strxfrm(char *dest, const char *src, size_t destsize, + pg_locale_t locale); +extern size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, + size_t srclen, pg_locale_t locale); +extern bool pg_strxfrm_prefix_enabled(pg_locale_t locale); +extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, + pg_locale_t locale); +extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, + size_t srclen, pg_locale_t locale); #ifdef USE_ICU extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); -- 2.34.1