mirror of
https://github.com/postgres/postgres.git
synced 2025-05-29 00:03:09 -04:00
Add support for collation attributes on older ICU versions
Starting in ICU 54, collation customization attributes can be specified in the locale string, for example "@colStrength=primary;colCaseLevel=yes". Add support for this for older ICU versions as well, by adding some minimal parsing of the attributes in the locale string and calling ucol_setAttribute() on them. This is essentially what never ICU versions do internally in ucol_open(). This was we can offer this functionality in a consistent way in all ICU versions supported by PostgreSQL. Also add some tests for ICU collation customization. Reported-by: Daniel Verite <daniel@manitou-mail.org> Discussion: https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
This commit is contained in:
parent
042162d628
commit
b8f9a2a69a
@ -58,6 +58,7 @@
|
|||||||
#include "catalog/pg_control.h"
|
#include "catalog/pg_control.h"
|
||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
|
#include "utils/formatting.h"
|
||||||
#include "utils/hsearch.h"
|
#include "utils/hsearch.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
|
|||||||
static char *IsoLocaleName(const char *); /* MSVC specific */
|
static char *IsoLocaleName(const char *); /* MSVC specific */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_ICU
|
||||||
|
static void icu_set_collation_attributes(UCollator *collator, const char *loc);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pg_perm_setlocale
|
* pg_perm_setlocale
|
||||||
@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
|
|||||||
(errmsg("could not open collator for locale \"%s\": %s",
|
(errmsg("could not open collator for locale \"%s\": %s",
|
||||||
collcollate, u_errorName(status))));
|
collcollate, u_errorName(status))));
|
||||||
|
|
||||||
|
if (U_ICU_VERSION_MAJOR_NUM < 54)
|
||||||
|
icu_set_collation_attributes(collator, collcollate);
|
||||||
|
|
||||||
/* We will leak this string if we get an error below :-( */
|
/* We will leak this string if we get an error below :-( */
|
||||||
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
|
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
|
||||||
collcollate);
|
collcollate);
|
||||||
@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
|||||||
return len_result;
|
return len_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse collation attributes and apply them to the open collator. This takes
|
||||||
|
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
|
||||||
|
* applies the key-value arguments.
|
||||||
|
*
|
||||||
|
* Starting with ICU version 54, the attributes are processed automatically by
|
||||||
|
* ucol_open(), so this is only necessary for emulating this behavior on older
|
||||||
|
* versions.
|
||||||
|
*/
|
||||||
|
pg_attribute_unused()
|
||||||
|
static void
|
||||||
|
icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||||
|
{
|
||||||
|
char *str = asc_tolower(loc, strlen(loc));
|
||||||
|
|
||||||
|
str = strchr(str, '@');
|
||||||
|
if (!str)
|
||||||
|
return;
|
||||||
|
str++;
|
||||||
|
|
||||||
|
for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
|
||||||
|
{
|
||||||
|
char *e = strchr(token, '=');
|
||||||
|
|
||||||
|
if (e)
|
||||||
|
{
|
||||||
|
char *name;
|
||||||
|
char *value;
|
||||||
|
UColAttribute uattr = -1;
|
||||||
|
UColAttributeValue uvalue = -1;
|
||||||
|
UErrorCode status;
|
||||||
|
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
|
||||||
|
*e = '\0';
|
||||||
|
name = token;
|
||||||
|
value = e + 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* See attribute name and value lists in ICU i18n/coll.cpp
|
||||||
|
*/
|
||||||
|
if (strcmp(name, "colstrength") == 0)
|
||||||
|
uattr = UCOL_STRENGTH;
|
||||||
|
else if (strcmp(name, "colbackwards") == 0)
|
||||||
|
uattr = UCOL_FRENCH_COLLATION;
|
||||||
|
else if (strcmp(name, "colcaselevel") == 0)
|
||||||
|
uattr = UCOL_CASE_LEVEL;
|
||||||
|
else if (strcmp(name, "colcasefirst") == 0)
|
||||||
|
uattr = UCOL_CASE_FIRST;
|
||||||
|
else if (strcmp(name, "colalternate") == 0)
|
||||||
|
uattr = UCOL_ALTERNATE_HANDLING;
|
||||||
|
else if (strcmp(name, "colnormalization") == 0)
|
||||||
|
uattr = UCOL_NORMALIZATION_MODE;
|
||||||
|
else if (strcmp(name, "colnumeric") == 0)
|
||||||
|
uattr = UCOL_NUMERIC_COLLATION;
|
||||||
|
/* ignore if unknown */
|
||||||
|
|
||||||
|
if (strcmp(value, "primary") == 0)
|
||||||
|
uvalue = UCOL_PRIMARY;
|
||||||
|
else if (strcmp(value, "secondary") == 0)
|
||||||
|
uvalue = UCOL_SECONDARY;
|
||||||
|
else if (strcmp(value, "tertiary") == 0)
|
||||||
|
uvalue = UCOL_TERTIARY;
|
||||||
|
else if (strcmp(value, "quaternary") == 0)
|
||||||
|
uvalue = UCOL_QUATERNARY;
|
||||||
|
else if (strcmp(value, "identical") == 0)
|
||||||
|
uvalue = UCOL_IDENTICAL;
|
||||||
|
else if (strcmp(value, "no") == 0)
|
||||||
|
uvalue = UCOL_OFF;
|
||||||
|
else if (strcmp(value, "yes") == 0)
|
||||||
|
uvalue = UCOL_ON;
|
||||||
|
else if (strcmp(value, "shifted") == 0)
|
||||||
|
uvalue = UCOL_SHIFTED;
|
||||||
|
else if (strcmp(value, "non-ignorable") == 0)
|
||||||
|
uvalue = UCOL_NON_IGNORABLE;
|
||||||
|
else if (strcmp(value, "lower") == 0)
|
||||||
|
uvalue = UCOL_LOWER_FIRST;
|
||||||
|
else if (strcmp(value, "upper") == 0)
|
||||||
|
uvalue = UCOL_UPPER_FIRST;
|
||||||
|
else
|
||||||
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
|
||||||
|
if (uattr != -1 && uvalue != -1)
|
||||||
|
ucol_setAttribute(collator, uattr, uvalue, &status);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pretend the error came from ucol_open(), for consistent error
|
||||||
|
* message across ICU versions.
|
||||||
|
*/
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("could not open collator for locale \"%s\": %s",
|
||||||
|
loc, u_errorName(status))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* USE_ICU */
|
#endif /* USE_ICU */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1100,6 +1100,45 @@ select textrange_en_us('A','Z') @> 'b'::text;
|
|||||||
|
|
||||||
drop type textrange_c;
|
drop type textrange_c;
|
||||||
drop type textrange_en_us;
|
drop type textrange_en_us;
|
||||||
|
-- test ICU collation customization
|
||||||
|
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
|
||||||
|
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
|
||||||
|
?column? | ?column?
|
||||||
|
----------+----------
|
||||||
|
t | t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
|
||||||
|
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
|
||||||
|
?column? | ?column?
|
||||||
|
----------+----------
|
||||||
|
t | t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
|
||||||
|
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
|
||||||
|
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
|
||||||
|
?column? | ?column?
|
||||||
|
----------+----------
|
||||||
|
t | t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
|
||||||
|
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
|
||||||
|
?column? | ?column?
|
||||||
|
----------+----------
|
||||||
|
t | t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
|
||||||
|
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
|
||||||
|
?column? | ?column?
|
||||||
|
----------+----------
|
||||||
|
t | t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
|
||||||
|
ERROR: could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
|
||||||
-- cleanup
|
-- cleanup
|
||||||
SET client_min_messages TO warning;
|
SET client_min_messages TO warning;
|
||||||
DROP SCHEMA collate_tests CASCADE;
|
DROP SCHEMA collate_tests CASCADE;
|
||||||
|
@ -425,6 +425,27 @@ drop type textrange_c;
|
|||||||
drop type textrange_en_us;
|
drop type textrange_en_us;
|
||||||
|
|
||||||
|
|
||||||
|
-- test ICU collation customization
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
|
||||||
|
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
|
||||||
|
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
|
||||||
|
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
|
||||||
|
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
|
||||||
|
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
|
||||||
|
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
|
||||||
|
|
||||||
|
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
|
||||||
|
|
||||||
|
|
||||||
-- cleanup
|
-- cleanup
|
||||||
SET client_min_messages TO warning;
|
SET client_min_messages TO warning;
|
||||||
DROP SCHEMA collate_tests CASCADE;
|
DROP SCHEMA collate_tests CASCADE;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user