diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index f2a4acc1150..44e43503a61 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -664,13 +664,6 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
-
- de-u-co-phonebk-x-icu
-
- German collation, phone book variant
-
-
-
de-AT-x-icu
@@ -683,13 +676,6 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
-
- de-AT-u-co-phonebk-x-icu
-
- German collation for Austria, phone book variant
-
-
-
und-x-icu (for undefined
)
@@ -709,6 +695,90 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
will draw an error along the lines of collation "de-x-icu" for
encoding "WIN874" does not exist>.
+
+
+ ICU allows collations to be customized beyond the basic language+country
+ set that is preloaded by initdb. Users are encouraged
+ to define their own collation objects that make use of these facilities to
+ suit the sorting behavior to their requirements. Here are some examples:
+
+
+
+ CREATE COLLATION "de-u-co-phonebk-x-icu" (provider = icu, locale = 'de-u-co-phonebk')
+
+ German collation with phone book collation type
+
+
+
+
+ CREATE COLLATION "und-u-co-emoji-x-icu" (provider = icu, locale = 'und-u-co-emoji')
+
+
+ Root collation with Emoji collation type, per Unicode Technical Standard #51
+
+
+
+
+
+ CREATE COLLATION digitslast (provider = icu, locale = 'en-u-kr-latn-digit')
+
+
+ Sort digits after Latin letters. (The default is digits before letters.)
+
+
+
+
+
+ CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper')
+
+
+ Sort upper-case letters before lower-case letters. (The default is
+ lower-case letters first.)
+
+
+
+
+
+ CREATE COLLATION special (provider = icu, locale = 'en-u-kf-upper-kr-latn-digit')
+
+
+ Combines both of the above options.
+
+
+
+
+
+ CREATE COLLATION numeric (provider = icu, locale = 'en-u-kn-true')
+
+
+ Numeric ordering, sorts sequences of digits by their numeric value,
+ for example: A-21 < A-123
+ (also known as natural sort).
+
+
+
+
+
+ See Unicode
+ Technical Standard #35
+ and BCP 47 for
+ details. The list of possible collation types (co
+ subtag) can be found in
+ the CLDR
+ repository.
+ The ICU Locale
+ Explorer can be used to check the details of a particular locale
+ definition.
+
+
+
+ Note that while this system allows creating collations that ignore
+ case
or ignore accents
or similar (using
+ the ks key), PostgreSQL does not at the moment allow
+ such collations to act in a truly case- or accent-insensitive manner. Any
+ strings that compare equal according to the collation but are not
+ byte-wise equal will be sorted according to their byte values.
+
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index d36ce535604..9437731276f 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -687,30 +687,11 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
*/
for (i = -1; i < uloc_countAvailable(); i++)
{
- /*
- * In ICU 4.2, ucol_getKeywordValuesForLocale() sometimes returns
- * values that will not be accepted by uloc_toLanguageTag(). Skip
- * loading keyword variants in that version. (Both
- * ucol_getKeywordValuesForLocale() and uloc_toLanguageTag() are
- * new in ICU 4.2, so older versions are not supported at all.)
- *
- * XXX We have no information about ICU 4.3 through 4.7, but we
- * know the code below works with 4.8.
- */
-#if U_ICU_VERSION_MAJOR_NUM > 4 || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM > 2)
-#define LOAD_ICU_KEYWORD_VARIANTS
-#endif
-
const char *name;
char *langtag;
char *icucomment;
const char *collcollate;
Oid collid;
-#ifdef LOAD_ICU_KEYWORD_VARIANTS
- UEnumeration *en;
- UErrorCode status;
- const char *val;
-#endif
if (i == -1)
name = ""; /* ICU root locale */
@@ -744,58 +725,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
CreateComments(collid, CollationRelationId, 0,
icucomment);
}
-
- /*
- * Add keyword variants, if enabled.
- */
-#ifdef LOAD_ICU_KEYWORD_VARIANTS
- status = U_ZERO_ERROR;
- en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
- if (U_FAILURE(status))
- ereport(ERROR,
- (errmsg("could not get keyword values for locale \"%s\": %s",
- name, u_errorName(status))));
-
- status = U_ZERO_ERROR;
- uenum_reset(en, &status);
- while ((val = uenum_next(en, NULL, &status)))
- {
- char *localeid = psprintf("%s@collation=%s", name, val);
-
- langtag = get_icu_language_tag(localeid);
- collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
-
- /*
- * Be paranoid about not allowing any non-ASCII strings into
- * pg_collation
- */
- if (!is_all_ascii(langtag) || !is_all_ascii(collcollate))
- continue;
-
- collid = CollationCreate(psprintf("%s-x-icu", langtag),
- nspid, GetUserId(),
- COLLPROVIDER_ICU, -1,
- collcollate, collcollate,
- get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
- true, true);
- if (OidIsValid(collid))
- {
- ncreated++;
-
- CommandCounterIncrement();
-
- icucomment = get_icu_locale_comment(localeid);
- if (icucomment)
- CreateComments(collid, CollationRelationId, 0,
- icucomment);
- }
- }
- if (U_FAILURE(status))
- ereport(ERROR,
- (errmsg("could not get keyword values for locale \"%s\": %s",
- name, u_errorName(status))));
- uenum_close(en);
-#endif /* LOAD_ICU_KEYWORD_VARIANTS */
}
}
#endif /* USE_ICU */