mirror of
https://github.com/postgres/postgres.git
synced 2025-05-27 00:04:24 -04:00
Add support for collation attributes on older ICU versions
Starting in ICU 54, collation customization attributes can be specified in the locale string, for example "@colStrength=primary;colCaseLevel=yes". Add support for this for older ICU versions as well, by adding some minimal parsing of the attributes in the locale string and calling ucol_setAttribute() on them. This is essentially what never ICU versions do internally in ucol_open(). This was we can offer this functionality in a consistent way in all ICU versions supported by PostgreSQL. Also add some tests for ICU collation customization. Reported-by: Daniel Verite <daniel@manitou-mail.org> Discussion: https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
This commit is contained in:
parent
042162d628
commit
b8f9a2a69a
@ -58,6 +58,7 @@
|
||||
#include "catalog/pg_control.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/formatting.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/memutils.h"
|
||||
@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
|
||||
static char *IsoLocaleName(const char *); /* MSVC specific */
|
||||
#endif
|
||||
|
||||
#ifdef USE_ICU
|
||||
static void icu_set_collation_attributes(UCollator *collator, const char *loc);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_perm_setlocale
|
||||
@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
|
||||
(errmsg("could not open collator for locale \"%s\": %s",
|
||||
collcollate, u_errorName(status))));
|
||||
|
||||
if (U_ICU_VERSION_MAJOR_NUM < 54)
|
||||
icu_set_collation_attributes(collator, collcollate);
|
||||
|
||||
/* We will leak this string if we get an error below :-( */
|
||||
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
|
||||
collcollate);
|
||||
@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||
return len_result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse collation attributes and apply them to the open collator. This takes
|
||||
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
|
||||
* applies the key-value arguments.
|
||||
*
|
||||
* Starting with ICU version 54, the attributes are processed automatically by
|
||||
* ucol_open(), so this is only necessary for emulating this behavior on older
|
||||
* versions.
|
||||
*/
|
||||
pg_attribute_unused()
|
||||
static void
|
||||
icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||
{
|
||||
char *str = asc_tolower(loc, strlen(loc));
|
||||
|
||||
str = strchr(str, '@');
|
||||
if (!str)
|
||||
return;
|
||||
str++;
|
||||
|
||||
for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
|
||||
{
|
||||
char *e = strchr(token, '=');
|
||||
|
||||
if (e)
|
||||
{
|
||||
char *name;
|
||||
char *value;
|
||||
UColAttribute uattr = -1;
|
||||
UColAttributeValue uvalue = -1;
|
||||
UErrorCode status;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
*e = '\0';
|
||||
name = token;
|
||||
value = e + 1;
|
||||
|
||||
/*
|
||||
* See attribute name and value lists in ICU i18n/coll.cpp
|
||||
*/
|
||||
if (strcmp(name, "colstrength") == 0)
|
||||
uattr = UCOL_STRENGTH;
|
||||
else if (strcmp(name, "colbackwards") == 0)
|
||||
uattr = UCOL_FRENCH_COLLATION;
|
||||
else if (strcmp(name, "colcaselevel") == 0)
|
||||
uattr = UCOL_CASE_LEVEL;
|
||||
else if (strcmp(name, "colcasefirst") == 0)
|
||||
uattr = UCOL_CASE_FIRST;
|
||||
else if (strcmp(name, "colalternate") == 0)
|
||||
uattr = UCOL_ALTERNATE_HANDLING;
|
||||
else if (strcmp(name, "colnormalization") == 0)
|
||||
uattr = UCOL_NORMALIZATION_MODE;
|
||||
else if (strcmp(name, "colnumeric") == 0)
|
||||
uattr = UCOL_NUMERIC_COLLATION;
|
||||
/* ignore if unknown */
|
||||
|
||||
if (strcmp(value, "primary") == 0)
|
||||
uvalue = UCOL_PRIMARY;
|
||||
else if (strcmp(value, "secondary") == 0)
|
||||
uvalue = UCOL_SECONDARY;
|
||||
else if (strcmp(value, "tertiary") == 0)
|
||||
uvalue = UCOL_TERTIARY;
|
||||
else if (strcmp(value, "quaternary") == 0)
|
||||
uvalue = UCOL_QUATERNARY;
|
||||
else if (strcmp(value, "identical") == 0)
|
||||
uvalue = UCOL_IDENTICAL;
|
||||
else if (strcmp(value, "no") == 0)
|
||||
uvalue = UCOL_OFF;
|
||||
else if (strcmp(value, "yes") == 0)
|
||||
uvalue = UCOL_ON;
|
||||
else if (strcmp(value, "shifted") == 0)
|
||||
uvalue = UCOL_SHIFTED;
|
||||
else if (strcmp(value, "non-ignorable") == 0)
|
||||
uvalue = UCOL_NON_IGNORABLE;
|
||||
else if (strcmp(value, "lower") == 0)
|
||||
uvalue = UCOL_LOWER_FIRST;
|
||||
else if (strcmp(value, "upper") == 0)
|
||||
uvalue = UCOL_UPPER_FIRST;
|
||||
else
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
||||
if (uattr != -1 && uvalue != -1)
|
||||
ucol_setAttribute(collator, uattr, uvalue, &status);
|
||||
|
||||
/*
|
||||
* Pretend the error came from ucol_open(), for consistent error
|
||||
* message across ICU versions.
|
||||
*/
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("could not open collator for locale \"%s\": %s",
|
||||
loc, u_errorName(status))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* USE_ICU */
|
||||
|
||||
/*
|
||||
|
@ -1100,6 +1100,45 @@ select textrange_en_us('A','Z') @> 'b'::text;
|
||||
|
||||
drop type textrange_c;
|
||||
drop type textrange_en_us;
|
||||
-- test ICU collation customization
|
||||
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
|
||||
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
|
||||
?column? | ?column?
|
||||
----------+----------
|
||||
t | t
|
||||
(1 row)
|
||||
|
||||
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
|
||||
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
|
||||
?column? | ?column?
|
||||
----------+----------
|
||||
t | t
|
||||
(1 row)
|
||||
|
||||
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
|
||||
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
|
||||
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
|
||||
?column? | ?column?
|
||||
----------+----------
|
||||
t | t
|
||||
(1 row)
|
||||
|
||||
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
|
||||
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
|
||||
?column? | ?column?
|
||||
----------+----------
|
||||
t | t
|
||||
(1 row)
|
||||
|
||||
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
|
||||
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
|
||||
?column? | ?column?
|
||||
----------+----------
|
||||
t | t
|
||||
(1 row)
|
||||
|
||||
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
|
||||
ERROR: could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
|
||||
-- cleanup
|
||||
SET client_min_messages TO warning;
|
||||
DROP SCHEMA collate_tests CASCADE;
|
||||
|
@ -425,6 +425,27 @@ drop type textrange_c;
|
||||
drop type textrange_en_us;
|
||||
|
||||
|
||||
-- test ICU collation customization
|
||||
|
||||
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
|
||||
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
|
||||
|
||||
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
|
||||
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
|
||||
|
||||
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
|
||||
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
|
||||
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
|
||||
|
||||
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
|
||||
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
|
||||
|
||||
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
|
||||
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
|
||||
|
||||
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
|
||||
|
||||
|
||||
-- cleanup
|
||||
SET client_min_messages TO warning;
|
||||
DROP SCHEMA collate_tests CASCADE;
|
||||
|
Loading…
x
Reference in New Issue
Block a user