From 79732b2f94d5097b5ceebd2a22fdbb692c780156 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Tue, 25 Apr 2023 15:01:55 -0700
Subject: [PATCH v5 6/7] Make LOCALE apply to ICU_LOCALE for CREATE DATABASE.

LOCALE is now an alias for LC_COLLATE, LC_CTYPE, and (if the provider
is ICU) ICU_LOCALE. The ICU provider accepts more locale names than
libc (e.g. language tags and locale names containing collation
attributes), so in some cases LC_COLLATE, LC_CTYPE, and ICU_LOCALE
will still need to be specified separately.

Previously, LOCALE applied only to LC_COLLATE and LC_CTYPE (and
similarly for --locale in initdb and createdb). That could lead to
confusion when the provider is implicit, such as when it is inherited
from the template database, or when ICU was made default at initdb
time in commit 27b62377b4.

Reverts incomplete fix 5cd1a5af4d.

Discussion: https://postgr.es/m/3391932.1682107209@sss.pgh.pa.us
---
 doc/src/sgml/ref/create_database.sgml         |  6 ++--
 doc/src/sgml/ref/createdb.sgml                |  5 +++-
 doc/src/sgml/ref/initdb.sgml                  |  7 +++--
 src/backend/commands/collationcmds.c          |  2 +-
 src/backend/commands/dbcommands.c             | 15 +++++++---
 src/bin/initdb/initdb.c                       | 11 ++++++--
 src/bin/scripts/createdb.c                    | 13 ++++-----
 src/bin/scripts/t/020_createdb.pl             |  4 +--
 src/test/icu/t/010_database.pl                | 23 +++++++++------
 .../regress/expected/collate.icu.utf8.out     | 28 +++++++++----------
 10 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml
index c730d02e15..dc57ba0c8b 100644
--- a/doc/src/sgml/ref/create_database.sgml
+++ b/doc/src/sgml/ref/create_database.sgml
@@ -145,8 +145,10 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
       <term><replaceable class="parameter">locale</replaceable></term>
       <listitem>
        <para>
-        This is a shortcut for setting <symbol>LC_COLLATE</symbol>
-        and <symbol>LC_CTYPE</symbol> at once.
+        This is a shortcut for setting <symbol>LC_COLLATE</symbol>,
+        <symbol>LC_CTYPE</symbol> and <symbol>ICU_LOCALE</symbol> at
+        once. Some locales are only valid for ICU, and must be set separately
+        with <symbol>ICU_LOCALE</symbol>.
        </para>
        <tip>
         <para>
diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml
index 7c573e848a..7991153ecc 100644
--- a/doc/src/sgml/ref/createdb.sgml
+++ b/doc/src/sgml/ref/createdb.sgml
@@ -124,7 +124,10 @@ PostgreSQL documentation
       <listitem>
        <para>
         Specifies the locale to be used in this database.  This is equivalent
-        to specifying both <option>--lc-collate</option> and <option>--lc-ctype</option>.
+        to specifying <option>--lc-collate</option>,
+        <option>--lc-ctype</option>, and <option>--icu-locale</option> to the
+        same value. Some locales are only valid for ICU and must be set with
+        <option>--icu-locale</option>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 76993acdfe..d9ef21c422 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -116,9 +116,10 @@ PostgreSQL documentation
   <para>
    To choose a different locale for the cluster, use the option
    <option>--locale</option>.  There are also individual options
-   <option>--lc-*</option> (see below) to set values for the individual locale
-   categories.  Note that inconsistent settings for different locale
-   categories can give nonsensical results, so this should be used with care.
+   <option>--lc-*</option> and <option>--icu-locale</option> (see below) to
+   set values for the individual locale categories.  Note that inconsistent
+   settings for different locale categories can give nonsensical results, so
+   this should be used with care.
   </para>
 
   <para>
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index ed64e17504..9a83f9f303 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -302,7 +302,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 				if (langtag && strcmp(colliculocale, langtag) != 0)
 				{
 					ereport(NOTICE,
-							(errmsg("using standard form \"%s\" for locale \"%s\"",
+							(errmsg("using standard form \"%s\" for ICU locale \"%s\"",
 									langtag, colliculocale)));
 
 					colliculocale = langtag;
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 6dc737aebb..154f20573c 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1019,7 +1019,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	if (dblocprovider == '\0')
 		dblocprovider = src_locprovider;
 	if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
-		dbiculocale = src_iculocale;
+	{
+		if (dlocale && dlocale->arg)
+			dbiculocale = defGetString(dlocale);
+		else
+			dbiculocale = src_iculocale;
+	}
 	if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
 		dbicurules = src_icurules;
 
@@ -1033,12 +1038,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				 errmsg("invalid locale name: \"%s\"", dbcollate)));
+				 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
 	dbcollate = canonname;
 	if (!check_locale(LC_CTYPE, dbctype, &canonname))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				 errmsg("invalid locale name: \"%s\"", dbctype)));
+				 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
 	dbctype = canonname;
 
 	check_encoding_locale_matches(encoding, dbcollate, dbctype);
@@ -1094,7 +1101,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			if (langtag && strcmp(dbiculocale, langtag) != 0)
 			{
 				ereport(NOTICE,
-						(errmsg("using standard form \"%s\" for locale \"%s\"",
+						(errmsg("using standard form \"%s\" for ICU locale \"%s\"",
 								langtag, dbiculocale)));
 
 				dbiculocale = langtag;
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index ea26bf8361..ccb2414fed 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2157,7 +2157,11 @@ check_locale_name(int category, const char *locale, char **canonname)
 	if (res == NULL)
 	{
 		if (*locale)
-			pg_fatal("invalid locale name \"%s\"", locale);
+		{
+			pg_log_error("invalid locale name \"%s\"", locale);
+			pg_log_error_hint("If the locale name is specific to ICU, use --icu-locale.");
+			exit(1);
+		}
 		else
 		{
 			/*
@@ -2467,7 +2471,7 @@ setlocales(void)
 {
 	char	   *canonname;
 
-	/* set empty lc_* values to locale config if set */
+	/* set empty lc_* and iculocale values to locale config if set */
 
 	if (locale_provider == COLLPROVIDER_NONE)
 	{
@@ -2499,6 +2503,8 @@ setlocales(void)
 			lc_monetary = locale;
 		if (!lc_messages)
 			lc_messages = locale;
+		if (!icu_locale && locale_provider == COLLPROVIDER_ICU)
+			icu_locale = locale;
 	}
 
 	if (icu_locale && locale_provider == COLLPROVIDER_ICU &&
@@ -3392,7 +3398,6 @@ main(int argc, char *argv[])
 				break;
 			case 8:
 				locale = "C";
-				locale_provider = COLLPROVIDER_LIBC;
 				break;
 			case 9:
 				pwfilename = pg_strdup(optarg);
diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
index 9caf9190cf..51c4bb3592 100644
--- a/src/bin/scripts/createdb.c
+++ b/src/bin/scripts/createdb.c
@@ -164,14 +164,6 @@ main(int argc, char *argv[])
 			exit(1);
 	}
 
-	if (locale)
-	{
-		if (!lc_ctype)
-			lc_ctype = locale;
-		if (!lc_collate)
-			lc_collate = locale;
-	}
-
 	if (locale_provider && pg_strcasecmp(locale_provider, "icu") == 0 &&
 		icu_locale &&
 		(pg_strcasecmp(icu_locale, "C") == 0 ||
@@ -230,6 +222,11 @@ main(int argc, char *argv[])
 		appendPQExpBuffer(&sql, " STRATEGY %s", fmtId(strategy));
 	if (template)
 		appendPQExpBuffer(&sql, " TEMPLATE %s", fmtId(template));
+	if (locale)
+	{
+		appendPQExpBufferStr(&sql, " LOCALE ");
+		appendStringLiteralConn(&sql, locale, conn);
+	}
 	if (lc_collate)
 	{
 		appendPQExpBufferStr(&sql, " LC_COLLATE ");
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index eb3682f0fd..81a9931c09 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -167,7 +167,7 @@ $node->command_checks_all(
 	1,
 	[qr/^$/],
 	[
-		qr/^createdb: error: database creation failed: ERROR:  invalid locale name|^createdb: error: database creation failed: ERROR:  new collation \(foo'; SELECT '1\) is incompatible with the collation of the template database/s
+		qr/^createdb: error: database creation failed: ERROR:  invalid LC_COLLATE locale name|^createdb: error: database creation failed: ERROR:  new collation \(foo'; SELECT '1\) is incompatible with the collation of the template database/s
 	],
 	'createdb with incorrect --lc-collate');
 $node->command_checks_all(
@@ -175,7 +175,7 @@ $node->command_checks_all(
 	1,
 	[qr/^$/],
 	[
-		qr/^createdb: error: database creation failed: ERROR:  invalid locale name|^createdb: error: database creation failed: ERROR:  new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the LC_CTYPE of the template database/s
+		qr/^createdb: error: database creation failed: ERROR:  invalid LC_CTYPE locale name|^createdb: error: database creation failed: ERROR:  new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the LC_CTYPE of the template database/s
 	],
 	'createdb with incorrect --lc-ctype');
 
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index 715b1bffd6..df4af00afe 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -51,16 +51,23 @@ b),
 	'sort by explicit collation upper first');
 
 
-# Test error cases in CREATE DATABASE involving locale-related options
+# Test that LOCALE='C' works for ICU
 
-my ($ret, $stdout, $stderr) = $node1->psql('postgres',
-	q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8});
-isnt($ret, 0,
-	"ICU locale must be specified for ICU provider: exit code not 0");
+my $ret1 = $node1->psql('postgres',
+	q{CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8});
+is($ret1, 0,
+	"C locale works for ICU");
+
+# Test that ICU-specific locale string must be specified with ICU_LOCALE,
+# not LOCALE
+
+my ($ret2, $stdout, $stderr) = $node1->psql('postgres',
+	q{CREATE DATABASE dbicu3 LOCALE_PROVIDER icu LOCALE '@colStrength=primary' TEMPLATE template0 ENCODING UTF8});
+isnt($ret2, 0,
+	"ICU-specific locale must be specified with ICU_LOCALE: exit code not 0");
 like(
 	$stderr,
-	qr/ERROR:  ICU locale must be specified/,
-	"ICU locale must be specified for ICU provider: error message");
-
+	qr/ERROR:  invalid LC_COLLATE locale name/,
+	"ICU-specific locale must be specified with ICU_LOCALE: error message");
 
 done_testing();
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index c0437231ad..39f61ca281 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1058,11 +1058,11 @@ CREATE COLLATION testx (provider = icu, locale = '@ASDF'); DROP COLLATION testx;
 WARNING:  could not convert locale name "@ASDF" to language tag: U_ILLEGAL_ARGUMENT_ERROR
 -- test special variants
 CREATE COLLATION testx (provider = icu, locale = '@EURO'); DROP COLLATION testx;
-NOTICE:  using standard form "und-u-cu-eur" for locale "@EURO"
+NOTICE:  using standard form "und-u-cu-eur" for ICU locale "@EURO"
 CREATE COLLATION testx (provider = icu, locale = '@pinyin'); DROP COLLATION testx;
-NOTICE:  using standard form "und-u-co-pinyin" for locale "@pinyin"
+NOTICE:  using standard form "und-u-co-pinyin" for ICU locale "@pinyin"
 CREATE COLLATION testx (provider = icu, locale = '@stroke'); DROP COLLATION testx;
-NOTICE:  using standard form "und-u-co-stroke" for locale "@stroke"
+NOTICE:  using standard form "und-u-co-stroke" for ICU locale "@stroke"
 CREATE COLLATION test4 FROM nonsense;
 ERROR:  collation "nonsense" for encoding "UTF8" does not exist
 CREATE COLLATION test5 FROM test0;
@@ -1211,9 +1211,9 @@ SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll
 (1 row)
 
 CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
-NOTICE:  using standard form "und-u-kf-lower" for locale "@colCaseFirst=lower"
+NOTICE:  using standard form "und-u-kf-lower" for ICU locale "@colCaseFirst=lower"
 CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
-NOTICE:  using standard form "und-u-kf-upper" for locale "@colCaseFirst=upper"
+NOTICE:  using standard form "und-u-kf-upper" for ICU locale "@colCaseFirst=upper"
 SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
  ?column? | ?column? 
 ----------+----------
@@ -1221,7 +1221,7 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
 (1 row)
 
 CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
-NOTICE:  using standard form "und-u-ka-shifted" for locale "@colAlternate=shifted"
+NOTICE:  using standard form "und-u-ka-shifted" for ICU locale "@colAlternate=shifted"
 SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
  ?column? | ?column? 
 ----------+----------
@@ -1238,12 +1238,12 @@ SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_n
 (1 row)
 
 CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
-NOTICE:  using standard form "und-u-kn-lower" for locale "@colNumeric=lower"
+NOTICE:  using standard form "und-u-kn-lower" for ICU locale "@colNumeric=lower"
 ERROR:  could not open collator for locale "und-u-kn-lower": U_ILLEGAL_ARGUMENT_ERROR
 -- test that attributes not handled by icu_set_collation_attributes()
 -- (handled by ucol_open() directly) also work
 CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=phonebook');
-NOTICE:  using standard form "de-u-co-phonebk" for locale "de@collation=phonebook"
+NOTICE:  using standard form "de-u-co-phonebk" for ICU locale "de@collation=phonebook"
 SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
  ?column? | ?column? 
 ----------+----------
@@ -1252,7 +1252,7 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
 
 -- rules
 CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g');
-NOTICE:  using standard form "und" for locale ""
+NOTICE:  using standard form "und" for ICU locale ""
 CREATE TABLE test7 (a text);
 -- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax
 INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green');
@@ -1280,13 +1280,13 @@ SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1;
 
 DROP TABLE test7;
 CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!');
-NOTICE:  using standard form "und" for locale ""
+NOTICE:  using standard form "und" for ICU locale ""
 ERROR:  could not open collator for locale "und" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
 -- nondeterministic collations
 CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
-NOTICE:  using standard form "und" for locale ""
+NOTICE:  using standard form "und" for ICU locale ""
 CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);
-NOTICE:  using standard form "und" for locale ""
+NOTICE:  using standard form "und" for ICU locale ""
 CREATE TABLE test6 (a int, b text);
 -- same string in different normal forms
 INSERT INTO test6 VALUES (1, U&'\00E4bc');
@@ -1336,9 +1336,9 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet;
 (2 rows)
 
 CREATE COLLATION case_sensitive (provider = icu, locale = '');
-NOTICE:  using standard form "und" for locale ""
+NOTICE:  using standard form "und" for ICU locale ""
 CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
-NOTICE:  using standard form "und-u-ks-level2" for locale "@colStrength=secondary"
+NOTICE:  using standard form "und-u-ks-level2" for ICU locale "@colStrength=secondary"
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
  ?column? | ?column? 
 ----------+----------
-- 
2.34.1

