From b1add0b2b4c9785b56e4dc222a89ec8f43b9c586 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 21 Nov 2025 12:41:47 -0800 Subject: [PATCH v9 05/11] Make regex "max_chr" depend on encoding, not provider. The previous per-provider "max_chr" field was there as a hack to preserve the exact prior behavior, which depended on the provider. Change to depend on the encoding, which makes more sense, and remove the per-provider logic. The only difference is for ICU: previously it always used MAX_SIMPLE_CHR (0x7FF) regardless of the encoding; whereas now it will match libc and use MAX_SIMPLE_CHR for UTF-8, and MAX_UCHAR for other encodings. That's possibly a loss for non-UTF8 multibyte encodings, but a win for single-byte encodings. Regardless, this distinction was not worth the complexity. --- src/backend/regex/regc_pg_locale.c | 18 ++++++++++-------- src/backend/utils/adt/pg_locale_libc.c | 2 -- src/include/utils/pg_locale.h | 6 ------ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 4698f110a0c..bb0e3f1d139 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -320,16 +320,18 @@ regc_ctype_get_cache(regc_wc_probefunc probefunc, int cclasscode) max_chr = (pg_wchar) MAX_SIMPLE_CHR; #endif } + else if (GetDatabaseEncoding() == PG_UTF8) + { + max_chr = (pg_wchar) MAX_SIMPLE_CHR; + } else { - if (pg_regex_locale->ctype->max_chr != 0 && - pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR) - { - max_chr = pg_regex_locale->ctype->max_chr; - pcc->cv.cclasscode = -1; - } - else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; +#if MAX_SIMPLE_CHR >= UCHAR_MAX + max_chr = (pg_wchar) UCHAR_MAX; + pcc->cv.cclasscode = -1; +#else + max_chr = (pg_wchar) MAX_SIMPLE_CHR; +#endif } /* diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 942454de4ed..a55167b0697 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -341,7 +341,6 @@ static const struct ctype_methods ctype_methods_libc_sb = { .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_sb, .wc_tolower = tolower_libc_sb, - .max_chr = UCHAR_MAX, }; /* @@ -367,7 +366,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = { .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_sb, .wc_tolower = tolower_libc_sb, - .max_chr = UCHAR_MAX, }; static const struct ctype_methods ctype_methods_libc_utf8 = { diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 49fd22bf8eb..40e58cc52b8 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -131,12 +131,6 @@ struct ctype_methods * pg_strlower(). */ char (*char_tolower) (unsigned char ch, pg_locale_t locale); - - /* - * For regex and pattern matching efficiency, the maximum char value - * supported by the above methods. If zero, limit is set by regex code. - */ - pg_wchar max_chr; }; /* -- 2.43.0