mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-26 00:02:18 -04:00 
			
		
		
		
	Control ctype behavior internally with a method table.
Previously, pattern matching and case mapping behavior branched based on the provider. Refactor to use a method table, which is less error-prone. This is also a step toward multiple provider versions, which we may want to support in the future. Reviewed-by: Andreas Karlsson <andreas@proxel.se> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/2830211e1b6e6a2e26d845780b03e125281ea17b.camel%40j-davis.com
This commit is contained in:
		
							parent
							
								
									d81dcc8d62
								
							
						
					
					
						commit
						5a38104b36
					
				| @ -20,58 +20,13 @@ | ||||
| #include "common/unicode_category.h" | ||||
| #include "utils/pg_locale.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * For the libc provider, to provide as much functionality as possible on a | ||||
|  * variety of platforms without going so far as to implement everything from | ||||
|  * scratch, we use several implementation strategies depending on the | ||||
|  * situation: | ||||
|  * | ||||
|  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on | ||||
|  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these | ||||
|  * collations don't give a fig about multibyte characters. | ||||
|  * | ||||
|  * 2. When working in UTF8 encoding, we use the <wctype.h> functions. | ||||
|  * This assumes that every platform uses Unicode codepoints directly | ||||
|  * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption | ||||
|  * even for non-UTF8 encodings, which may be a problem.)  On some platforms | ||||
|  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. | ||||
|  * | ||||
|  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar | ||||
|  * values up to 255, and punt for values above that.  This is 100% correct | ||||
|  * only in single-byte encodings such as LATINn.  However, non-Unicode | ||||
|  * multibyte encodings are mostly Far Eastern character sets for which the | ||||
|  * properties being tested here aren't very relevant for higher code values | ||||
|  * anyway.  The difficulty with using the <wctype.h> functions with | ||||
|  * non-Unicode multibyte encodings is that we can have no certainty that | ||||
|  * the platform's wchar_t representation matches what we do in pg_wchar | ||||
|  * conversions. | ||||
|  * | ||||
|  * As a special case, in the "default" collation, (2) and (3) force ASCII | ||||
|  * letters to follow ASCII upcase/downcase rules, while in a non-default | ||||
|  * collation we just let the library functions do what they will.  The case | ||||
|  * where this matters is treatment of I/i in Turkish, and the behavior is | ||||
|  * meant to match the upper()/lower() SQL functions. | ||||
|  * | ||||
|  * We store the active collation setting in static variables.  In principle | ||||
|  * it could be passed down to here via the regex library's "struct vars" data | ||||
|  * structure; but that would require somewhat invasive changes in the regex | ||||
|  * library, and right now there's no real benefit to be gained from that. | ||||
|  * | ||||
|  * NB: the coding here assumes pg_wchar is an unsigned type. | ||||
|  */ | ||||
| 
 | ||||
| typedef enum | ||||
| { | ||||
| 	PG_REGEX_STRATEGY_C,		/* C locale (encoding independent) */ | ||||
| 	PG_REGEX_STRATEGY_BUILTIN,	/* built-in Unicode semantics */ | ||||
| 	PG_REGEX_STRATEGY_LIBC_WIDE,	/* Use locale_t <wctype.h> functions */ | ||||
| 	PG_REGEX_STRATEGY_LIBC_1BYTE,	/* Use locale_t <ctype.h> functions */ | ||||
| 	PG_REGEX_STRATEGY_ICU,		/* Use ICU uchar.h functions */ | ||||
| } PG_Locale_Strategy; | ||||
| 
 | ||||
| static PG_Locale_Strategy pg_regex_strategy; | ||||
| static pg_locale_t pg_regex_locale; | ||||
| 
 | ||||
| static struct pg_locale_struct dummy_c_locale = { | ||||
| 	.collate_is_c = true, | ||||
| 	.ctype_is_c = true, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Hard-wired character properties for C locale | ||||
|  */ | ||||
| @ -228,7 +183,6 @@ void | ||||
| pg_set_regex_collation(Oid collation) | ||||
| { | ||||
| 	pg_locale_t locale = 0; | ||||
| 	PG_Locale_Strategy strategy; | ||||
| 
 | ||||
| 	if (!OidIsValid(collation)) | ||||
| 	{ | ||||
| @ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation) | ||||
| 		 * catalog access is available, so we can't call | ||||
| 		 * pg_newlocale_from_collation(). | ||||
| 		 */ | ||||
| 		strategy = PG_REGEX_STRATEGY_C; | ||||
| 		locale = 0; | ||||
| 		locale = &dummy_c_locale; | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| @ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation) | ||||
| 			 * C/POSIX collations use this path regardless of database | ||||
| 			 * encoding | ||||
| 			 */ | ||||
| 			strategy = PG_REGEX_STRATEGY_C; | ||||
| 			locale = 0; | ||||
| 		} | ||||
| 		else if (locale->provider == COLLPROVIDER_BUILTIN) | ||||
| 		{ | ||||
| 			Assert(GetDatabaseEncoding() == PG_UTF8); | ||||
| 			strategy = PG_REGEX_STRATEGY_BUILTIN; | ||||
| 		} | ||||
| #ifdef USE_ICU | ||||
| 		else if (locale->provider == COLLPROVIDER_ICU) | ||||
| 		{ | ||||
| 			strategy = PG_REGEX_STRATEGY_ICU; | ||||
| 		} | ||||
| #endif | ||||
| 		else | ||||
| 		{ | ||||
| 			Assert(locale->provider == COLLPROVIDER_LIBC); | ||||
| 			if (GetDatabaseEncoding() == PG_UTF8) | ||||
| 				strategy = PG_REGEX_STRATEGY_LIBC_WIDE; | ||||
| 			else | ||||
| 				strategy = PG_REGEX_STRATEGY_LIBC_1BYTE; | ||||
| 			locale = &dummy_c_locale; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	pg_regex_strategy = strategy; | ||||
| 	pg_regex_locale = locale; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_isdigit(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISDIGIT)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isdigit(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISDIGIT)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_isalpha(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISALPHA)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isalpha(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isalpha(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISALPHA)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_isalnum(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISALNUM)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isalnum(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISALNUM)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| @ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c) | ||||
| static int | ||||
| pg_wc_isupper(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISUPPER)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isupper(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswupper_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isupper_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isupper(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISUPPER)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_islower(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISLOWER)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_islower(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswlower_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					islower_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_islower(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISLOWER)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_isgraph(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISGRAPH)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isgraph(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isgraph(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISGRAPH)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_isprint(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISPRINT)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isprint(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswprint_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isprint_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isprint(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISPRINT)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_ispunct(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISPUNCT)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_ispunct(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISPUNCT)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| pg_wc_isspace(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			return (c <= (pg_wchar) 127 && | ||||
| 					(pg_char_properties[c] & PG_ISSPACE)); | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return pg_u_isspace(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return iswspace_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			return (c <= (pg_wchar) UCHAR_MAX && | ||||
| 					isspace_l((unsigned char) c, pg_regex_locale->info.lt)); | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_isspace(c); | ||||
| #endif | ||||
| 			break; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 		return (c <= (pg_wchar) 127 && | ||||
| 				(pg_char_properties[c] & PG_ISSPACE)); | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| pg_wc_toupper(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			if (c <= (pg_wchar) 127) | ||||
| 				return pg_ascii_toupper((unsigned char) c); | ||||
| 			return c; | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return unicode_uppercase_simple(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			/* force C behavior for ASCII characters, per comments above */ | ||||
| 			if (pg_regex_locale->is_default && c <= (pg_wchar) 127) | ||||
| 				return pg_ascii_toupper((unsigned char) c); | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return towupper_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			/* force C behavior for ASCII characters, per comments above */ | ||||
| 			if (pg_regex_locale->is_default && c <= (pg_wchar) 127) | ||||
| 				return pg_ascii_toupper((unsigned char) c); | ||||
| 			if (c <= (pg_wchar) UCHAR_MAX) | ||||
| 				return toupper_l((unsigned char) c, pg_regex_locale->info.lt); | ||||
| 			return c; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_toupper(c); | ||||
| #endif | ||||
| 			break; | ||||
| 		if (c <= (pg_wchar) 127) | ||||
| 			return pg_ascii_toupper((unsigned char) c); | ||||
| 		return c; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| pg_wc_tolower(pg_wchar c) | ||||
| { | ||||
| 	switch (pg_regex_strategy) | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| 			if (c <= (pg_wchar) 127) | ||||
| 				return pg_ascii_tolower((unsigned char) c); | ||||
| 			return c; | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			return unicode_lowercase_simple(c); | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			/* force C behavior for ASCII characters, per comments above */ | ||||
| 			if (pg_regex_locale->is_default && c <= (pg_wchar) 127) | ||||
| 				return pg_ascii_tolower((unsigned char) c); | ||||
| 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) | ||||
| 				return towlower_l((wint_t) c, pg_regex_locale->info.lt); | ||||
| 			/* FALL THRU */ | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| 			/* force C behavior for ASCII characters, per comments above */ | ||||
| 			if (pg_regex_locale->is_default && c <= (pg_wchar) 127) | ||||
| 				return pg_ascii_tolower((unsigned char) c); | ||||
| 			if (c <= (pg_wchar) UCHAR_MAX) | ||||
| 				return tolower_l((unsigned char) c, pg_regex_locale->info.lt); | ||||
| 			return c; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| #ifdef USE_ICU | ||||
| 			return u_tolower(c); | ||||
| #endif | ||||
| 			break; | ||||
| 		if (c <= (pg_wchar) 127) | ||||
| 			return pg_ascii_tolower((unsigned char) c); | ||||
| 		return c; | ||||
| 	} | ||||
| 	return 0;					/* can't get here, but keep compiler quiet */ | ||||
| 	else | ||||
| 		return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) | ||||
| 	 * would always be true for production values of MAX_SIMPLE_CHR, but it's | ||||
| 	 * useful to allow it to be small for testing purposes.) | ||||
| 	 */ | ||||
| 	switch (pg_regex_strategy) | ||||
| 	if (pg_regex_locale->ctype_is_c) | ||||
| 	{ | ||||
| 		case PG_REGEX_STRATEGY_C: | ||||
| #if MAX_SIMPLE_CHR >= 127 | ||||
| 			max_chr = (pg_wchar) 127; | ||||
| 			pcc->cv.cclasscode = -1; | ||||
| 		max_chr = (pg_wchar) 127; | ||||
| 		pcc->cv.cclasscode = -1; | ||||
| #else | ||||
| 			max_chr = (pg_wchar) MAX_SIMPLE_CHR; | ||||
| 		max_chr = (pg_wchar) MAX_SIMPLE_CHR; | ||||
| #endif | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_BUILTIN: | ||||
| 			max_chr = (pg_wchar) MAX_SIMPLE_CHR; | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_LIBC_WIDE: | ||||
| 			max_chr = (pg_wchar) MAX_SIMPLE_CHR; | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_LIBC_1BYTE: | ||||
| #if MAX_SIMPLE_CHR >= UCHAR_MAX | ||||
| 			max_chr = (pg_wchar) UCHAR_MAX; | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		if (pg_regex_locale->ctype->max_chr != 0 && | ||||
| 			pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR) | ||||
| 		{ | ||||
| 			max_chr = pg_regex_locale->ctype->max_chr; | ||||
| 			pcc->cv.cclasscode = -1; | ||||
| #else | ||||
| 		} | ||||
| 		else | ||||
| 			max_chr = (pg_wchar) MAX_SIMPLE_CHR; | ||||
| #endif | ||||
| 			break; | ||||
| 		case PG_REGEX_STRATEGY_ICU: | ||||
| 			max_chr = (pg_wchar) MAX_SIMPLE_CHR; | ||||
| 			break; | ||||
| 		default: | ||||
| 			Assert(false); | ||||
| 			max_chr = 0;		/* can't get here, but keep compiler quiet */ | ||||
| 			break; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
|  | ||||
| @ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale) | ||||
| 	else if (locale->is_default) | ||||
| 		return pg_tolower(c); | ||||
| 	else | ||||
| 		return tolower_l(c, locale->info.lt); | ||||
| 		return char_tolower(c, locale); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) | ||||
| 	 * way. | ||||
| 	 */ | ||||
| 
 | ||||
| 	if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU)) | ||||
| 	if (locale->ctype_is_c || | ||||
| 		(char_tolower_enabled(locale) && | ||||
| 		 pg_database_encoding_max_length() == 1)) | ||||
| 	{ | ||||
| 		p = VARDATA_ANY(pat); | ||||
| 		plen = VARSIZE_ANY_EXHDR(pat); | ||||
| 		s = VARDATA_ANY(str); | ||||
| 		slen = VARSIZE_ANY_EXHDR(str); | ||||
| 		return SB_IMatchText(s, slen, p, plen, locale); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, | ||||
| 													 PointerGetDatum(pat))); | ||||
| @ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) | ||||
| 		else | ||||
| 			return MB_MatchText(s, slen, p, plen, 0); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		p = VARDATA_ANY(pat); | ||||
| 		plen = VARSIZE_ANY_EXHDR(pat); | ||||
| 		s = VARDATA_ANY(str); | ||||
| 		slen = VARSIZE_ANY_EXHDR(str); | ||||
| 		return SB_IMatchText(s, slen, p, plen, locale); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | ||||
| @ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte, | ||||
| { | ||||
| 	if (locale->ctype_is_c) | ||||
| 		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); | ||||
| 	else if (is_multibyte && IS_HIGHBIT_SET(c)) | ||||
| 		return true; | ||||
| 	else if (locale->provider != COLLPROVIDER_LIBC) | ||||
| 		return IS_HIGHBIT_SET(c) || | ||||
| 			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); | ||||
| 	else | ||||
| 		return isalpha_l((unsigned char) c, locale->info.lt); | ||||
| 		return char_is_cased(c, locale); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); | ||||
| extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); | ||||
| extern char *get_collation_actual_version_libc(const char *collcollate); | ||||
| 
 | ||||
| extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, | ||||
| 							   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, | ||||
| 							   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, | ||||
| 							   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src, | ||||
| 							  ssize_t srclen, pg_locale_t locale); | ||||
| 
 | ||||
| extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strfold_icu(char *dst, size_t dstsize, const char *src, | ||||
| 						  ssize_t srclen, pg_locale_t locale); | ||||
| 
 | ||||
| extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, | ||||
| 							ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, | ||||
| 							ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, | ||||
| 							ssize_t srclen, pg_locale_t locale); | ||||
| 
 | ||||
| /* GUC settings */ | ||||
| char	   *locale_messages; | ||||
| char	   *locale_monetary; | ||||
| @ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context) | ||||
| 	Assert((result->collate_is_c && result->collate == NULL) || | ||||
| 		   (!result->collate_is_c && result->collate != NULL)); | ||||
| 
 | ||||
| 	Assert((result->ctype_is_c && result->ctype == NULL) || | ||||
| 		   (!result->ctype_is_c && result->ctype != NULL)); | ||||
| 
 | ||||
| 	datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, | ||||
| 							&isnull); | ||||
| 	if (!isnull) | ||||
| @ -1256,77 +1234,31 @@ size_t | ||||
| pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, | ||||
| 			pg_locale_t locale) | ||||
| { | ||||
| 	if (locale->provider == COLLPROVIDER_BUILTIN) | ||||
| 		return strlower_builtin(dst, dstsize, src, srclen, locale); | ||||
| #ifdef USE_ICU | ||||
| 	else if (locale->provider == COLLPROVIDER_ICU) | ||||
| 		return strlower_icu(dst, dstsize, src, srclen, locale); | ||||
| #endif | ||||
| 	else if (locale->provider == COLLPROVIDER_LIBC) | ||||
| 		return strlower_libc(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		/* shouldn't happen */ | ||||
| 		PGLOCALE_SUPPORT_ERROR(locale->provider); | ||||
| 
 | ||||
| 	return 0;					/* keep compiler quiet */ | ||||
| 	return locale->ctype->strlower(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, | ||||
| 			pg_locale_t locale) | ||||
| { | ||||
| 	if (locale->provider == COLLPROVIDER_BUILTIN) | ||||
| 		return strtitle_builtin(dst, dstsize, src, srclen, locale); | ||||
| #ifdef USE_ICU | ||||
| 	else if (locale->provider == COLLPROVIDER_ICU) | ||||
| 		return strtitle_icu(dst, dstsize, src, srclen, locale); | ||||
| #endif | ||||
| 	else if (locale->provider == COLLPROVIDER_LIBC) | ||||
| 		return strtitle_libc(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		/* shouldn't happen */ | ||||
| 		PGLOCALE_SUPPORT_ERROR(locale->provider); | ||||
| 
 | ||||
| 	return 0;					/* keep compiler quiet */ | ||||
| 	return locale->ctype->strtitle(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, | ||||
| 			pg_locale_t locale) | ||||
| { | ||||
| 	if (locale->provider == COLLPROVIDER_BUILTIN) | ||||
| 		return strupper_builtin(dst, dstsize, src, srclen, locale); | ||||
| #ifdef USE_ICU | ||||
| 	else if (locale->provider == COLLPROVIDER_ICU) | ||||
| 		return strupper_icu(dst, dstsize, src, srclen, locale); | ||||
| #endif | ||||
| 	else if (locale->provider == COLLPROVIDER_LIBC) | ||||
| 		return strupper_libc(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		/* shouldn't happen */ | ||||
| 		PGLOCALE_SUPPORT_ERROR(locale->provider); | ||||
| 
 | ||||
| 	return 0;					/* keep compiler quiet */ | ||||
| 	return locale->ctype->strupper(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, | ||||
| 		   pg_locale_t locale) | ||||
| { | ||||
| 	if (locale->provider == COLLPROVIDER_BUILTIN) | ||||
| 		return strfold_builtin(dst, dstsize, src, srclen, locale); | ||||
| #ifdef USE_ICU | ||||
| 	else if (locale->provider == COLLPROVIDER_ICU) | ||||
| 		return strfold_icu(dst, dstsize, src, srclen, locale); | ||||
| #endif | ||||
| 	/* for libc, just use strlower */ | ||||
| 	else if (locale->provider == COLLPROVIDER_LIBC) | ||||
| 		return strlower_libc(dst, dstsize, src, srclen, locale); | ||||
| 	if (locale->ctype->strfold) | ||||
| 		return locale->ctype->strfold(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		/* shouldn't happen */ | ||||
| 		PGLOCALE_SUPPORT_ERROR(locale->provider); | ||||
| 
 | ||||
| 	return 0;					/* keep compiler quiet */ | ||||
| 		return locale->ctype->strlower(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -1463,6 +1395,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, | ||||
| 	return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * char_is_cased() | ||||
|  * | ||||
|  * Fuzzy test of whether the given char is case-varying or not. The argument | ||||
|  * is a single byte, so in a multibyte encoding, just assume any non-ASCII | ||||
|  * char is case-varying. | ||||
|  */ | ||||
| bool | ||||
| char_is_cased(char ch, pg_locale_t locale) | ||||
| { | ||||
| 	return locale->ctype->char_is_cased(ch, locale); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * char_tolower_enabled() | ||||
|  * | ||||
|  * Does the provider support char_tolower()? | ||||
|  */ | ||||
| bool | ||||
| char_tolower_enabled(pg_locale_t locale) | ||||
| { | ||||
| 	return (locale->ctype->char_tolower != NULL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * char_tolower() | ||||
|  * | ||||
|  * Convert char (single-byte encoding) to lowercase. | ||||
|  */ | ||||
| char | ||||
| char_tolower(unsigned char ch, pg_locale_t locale) | ||||
| { | ||||
| 	return locale->ctype->char_tolower(ch, locale); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return required encoding ID for the given locale, or -1 if any encoding is | ||||
|  * valid for the locale. | ||||
|  | ||||
| @ -24,15 +24,6 @@ | ||||
| extern pg_locale_t create_pg_locale_builtin(Oid collid, | ||||
| 											MemoryContext context); | ||||
| extern char *get_collation_actual_version_builtin(const char *collcollate); | ||||
| extern size_t strlower_builtin(char *dest, size_t destsize, const char *src, | ||||
| 							   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src, | ||||
| 							   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strupper_builtin(char *dest, size_t destsize, const char *src, | ||||
| 							   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strfold_builtin(char *dest, size_t destsize, const char *src, | ||||
| 							  ssize_t srclen, pg_locale_t locale); | ||||
| 
 | ||||
| 
 | ||||
| struct WordBoundaryState | ||||
| { | ||||
| @ -76,7 +67,7 @@ initcap_wbnext(void *state) | ||||
| 	return wbstate->len; | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 				 pg_locale_t locale) | ||||
| { | ||||
| @ -84,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 							locale->info.builtin.casemap_full); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 				 pg_locale_t locale) | ||||
| { | ||||
| @ -102,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 							initcap_wbnext, &wbstate); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 				 pg_locale_t locale) | ||||
| { | ||||
| @ -110,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 							locale->info.builtin.casemap_full); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 				pg_locale_t locale) | ||||
| { | ||||
| @ -118,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 						   locale->info.builtin.casemap_full); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isdigit(wc, !locale->info.builtin.casemap_full); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isalpha(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isalnum(wc, !locale->info.builtin.casemap_full); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isupper(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_islower_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_islower(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isgraph(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isprint(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_ispunct(wc, !locale->info.builtin.casemap_full); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return pg_u_isspace(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| char_is_cased_builtin(char ch, pg_locale_t locale) | ||||
| { | ||||
| 	return IS_HIGHBIT_SET(ch) || | ||||
| 		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return unicode_uppercase_simple(wc); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return unicode_lowercase_simple(wc); | ||||
| } | ||||
| 
 | ||||
| static const struct ctype_methods ctype_methods_builtin = { | ||||
| 	.strlower = strlower_builtin, | ||||
| 	.strtitle = strtitle_builtin, | ||||
| 	.strupper = strupper_builtin, | ||||
| 	.strfold = strfold_builtin, | ||||
| 	.wc_isdigit = wc_isdigit_builtin, | ||||
| 	.wc_isalpha = wc_isalpha_builtin, | ||||
| 	.wc_isalnum = wc_isalnum_builtin, | ||||
| 	.wc_isupper = wc_isupper_builtin, | ||||
| 	.wc_islower = wc_islower_builtin, | ||||
| 	.wc_isgraph = wc_isgraph_builtin, | ||||
| 	.wc_isprint = wc_isprint_builtin, | ||||
| 	.wc_ispunct = wc_ispunct_builtin, | ||||
| 	.wc_isspace = wc_isspace_builtin, | ||||
| 	.char_is_cased = char_is_cased_builtin, | ||||
| 	.wc_tolower = wc_tolower_builtin, | ||||
| 	.wc_toupper = wc_toupper_builtin, | ||||
| }; | ||||
| 
 | ||||
| pg_locale_t | ||||
| create_pg_locale_builtin(Oid collid, MemoryContext context) | ||||
| { | ||||
| @ -161,6 +244,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) | ||||
| 	result->deterministic = true; | ||||
| 	result->collate_is_c = true; | ||||
| 	result->ctype_is_c = (strcmp(locstr, "C") == 0); | ||||
| 	if (!result->ctype_is_c) | ||||
| 		result->ctype = &ctype_methods_builtin; | ||||
| 
 | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
| @ -48,19 +48,22 @@ | ||||
| #define		TEXTBUFLEN			1024 | ||||
| 
 | ||||
| extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); | ||||
| extern size_t strlower_icu(char *dest, size_t destsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strtitle_icu(char *dest, size_t destsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strupper_icu(char *dest, size_t destsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strfold_icu(char *dest, size_t destsize, const char *src, | ||||
| 						  ssize_t srclen, pg_locale_t locale); | ||||
| 
 | ||||
| #ifdef USE_ICU | ||||
| 
 | ||||
| extern UCollator *pg_ucol_open(const char *loc_str); | ||||
| 
 | ||||
| static size_t strlower_icu(char *dest, size_t destsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| static size_t strtitle_icu(char *dest, size_t destsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| static size_t strupper_icu(char *dest, size_t destsize, const char *src, | ||||
| 						   ssize_t srclen, pg_locale_t locale); | ||||
| static size_t strfold_icu(char *dest, size_t destsize, const char *src, | ||||
| 						  ssize_t srclen, pg_locale_t locale); | ||||
| static int	strncoll_icu(const char *arg1, ssize_t len1, | ||||
| 						 const char *arg2, ssize_t len2, | ||||
| 						 pg_locale_t locale); | ||||
| static size_t strnxfrm_icu(char *dest, size_t destsize, | ||||
| 						   const char *src, ssize_t srclen, | ||||
| 						   pg_locale_t locale); | ||||
| @ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity, | ||||
| 									 const char *locale, | ||||
| 									 UErrorCode *pErrorCode); | ||||
| 
 | ||||
| static bool | ||||
| char_is_cased_icu(char ch, pg_locale_t locale) | ||||
| { | ||||
| 	return IS_HIGHBIT_SET(ch) || | ||||
| 		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| toupper_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_toupper(wc); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| tolower_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_tolower(wc); | ||||
| } | ||||
| 
 | ||||
| static const struct collate_methods collate_methods_icu = { | ||||
| 	.strncoll = strncoll_icu, | ||||
| 	.strnxfrm = strnxfrm_icu, | ||||
| @ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = { | ||||
| 	.strxfrm_is_safe = true, | ||||
| }; | ||||
| 
 | ||||
| static bool | ||||
| wc_isdigit_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isdigit(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalpha_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isalpha(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalnum_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isalnum(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isupper_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isupper(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_islower_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_islower(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isgraph_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isgraph(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isprint_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isprint(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_ispunct_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_ispunct(wc); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isspace_icu(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return u_isspace(wc); | ||||
| } | ||||
| 
 | ||||
| static const struct ctype_methods ctype_methods_icu = { | ||||
| 	.strlower = strlower_icu, | ||||
| 	.strtitle = strtitle_icu, | ||||
| 	.strupper = strupper_icu, | ||||
| 	.strfold = strfold_icu, | ||||
| 	.wc_isdigit = wc_isdigit_icu, | ||||
| 	.wc_isalpha = wc_isalpha_icu, | ||||
| 	.wc_isalnum = wc_isalnum_icu, | ||||
| 	.wc_isupper = wc_isupper_icu, | ||||
| 	.wc_islower = wc_islower_icu, | ||||
| 	.wc_isgraph = wc_isgraph_icu, | ||||
| 	.wc_isprint = wc_isprint_icu, | ||||
| 	.wc_ispunct = wc_ispunct_icu, | ||||
| 	.wc_isspace = wc_isspace_icu, | ||||
| 	.char_is_cased = char_is_cased_icu, | ||||
| 	.wc_toupper = toupper_icu, | ||||
| 	.wc_tolower = tolower_icu, | ||||
| }; | ||||
| #endif | ||||
| 
 | ||||
| pg_locale_t | ||||
| @ -206,6 +300,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) | ||||
| 		result->collate = &collate_methods_icu_utf8; | ||||
| 	else | ||||
| 		result->collate = &collate_methods_icu; | ||||
| 	result->ctype = &ctype_methods_icu; | ||||
| 
 | ||||
| 	return result; | ||||
| #else | ||||
| @ -379,7 +474,7 @@ make_icu_collator(const char *iculocstr, const char *icurules) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 			 pg_locale_t locale) | ||||
| { | ||||
| @ -399,7 +494,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 	return result_len; | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 			 pg_locale_t locale) | ||||
| { | ||||
| @ -419,7 +514,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 	return result_len; | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 			 pg_locale_t locale) | ||||
| { | ||||
| @ -439,7 +534,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 	return result_len; | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| static size_t | ||||
| strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 			pg_locale_t locale) | ||||
| { | ||||
|  | ||||
| @ -33,6 +33,46 @@ | ||||
| #include <shlwapi.h> | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * For the libc provider, to provide as much functionality as possible on a | ||||
|  * variety of platforms without going so far as to implement everything from | ||||
|  * scratch, we use several implementation strategies depending on the | ||||
|  * situation: | ||||
|  * | ||||
|  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on | ||||
|  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these | ||||
|  * collations don't give a fig about multibyte characters. | ||||
|  * | ||||
|  * 2. When working in UTF8 encoding, we use the <wctype.h> functions. | ||||
|  * This assumes that every platform uses Unicode codepoints directly | ||||
|  * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption | ||||
|  * even for non-UTF8 encodings, which may be a problem.)  On some platforms | ||||
|  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. | ||||
|  * | ||||
|  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar | ||||
|  * values up to 255, and punt for values above that.  This is 100% correct | ||||
|  * only in single-byte encodings such as LATINn.  However, non-Unicode | ||||
|  * multibyte encodings are mostly Far Eastern character sets for which the | ||||
|  * properties being tested here aren't very relevant for higher code values | ||||
|  * anyway.  The difficulty with using the <wctype.h> functions with | ||||
|  * non-Unicode multibyte encodings is that we can have no certainty that | ||||
|  * the platform's wchar_t representation matches what we do in pg_wchar | ||||
|  * conversions. | ||||
|  * | ||||
|  * As a special case, in the "default" collation, (2) and (3) force ASCII | ||||
|  * letters to follow ASCII upcase/downcase rules, while in a non-default | ||||
|  * collation we just let the library functions do what they will.  The case | ||||
|  * where this matters is treatment of I/i in Turkish, and the behavior is | ||||
|  * meant to match the upper()/lower() SQL functions. | ||||
|  * | ||||
|  * We store the active collation setting in static variables.  In principle | ||||
|  * it could be passed down to here via the regex library's "struct vars" data | ||||
|  * structure; but that would require somewhat invasive changes in the regex | ||||
|  * library, and right now there's no real benefit to be gained from that. | ||||
|  * | ||||
|  * NB: the coding here assumes pg_wchar is an unsigned type. | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Size of stack buffer to use for string transformations, used to avoid heap | ||||
|  * allocations in typical cases. This should be large enough that most strings | ||||
| @ -43,13 +83,6 @@ | ||||
| 
 | ||||
| extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); | ||||
| 
 | ||||
| extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, | ||||
| 							ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, | ||||
| 							ssize_t srclen, pg_locale_t locale); | ||||
| extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, | ||||
| 							ssize_t srclen, pg_locale_t locale); | ||||
| 
 | ||||
| static int	strncoll_libc(const char *arg1, ssize_t len1, | ||||
| 						  const char *arg2, ssize_t len2, | ||||
| 						  pg_locale_t locale); | ||||
| @ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, | ||||
| 							   const char *src, ssize_t srclen, | ||||
| 							   pg_locale_t locale); | ||||
| 
 | ||||
| static bool | ||||
| wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isdigit_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isalpha_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isalnum_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isupper_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return islower_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isgraph_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isprint_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return ispunct_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return isspace_l((unsigned char) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswdigit_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswalpha_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswalnum_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswupper_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswlower_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswgraph_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswprint_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswpunct_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	return iswspace_l((wint_t) wc, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static char | ||||
| char_tolower_libc(unsigned char ch, pg_locale_t locale) | ||||
| { | ||||
| 	Assert(pg_database_encoding_max_length() == 1); | ||||
| 	return tolower_l(ch, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| char_is_cased_libc(char ch, pg_locale_t locale) | ||||
| { | ||||
| 	bool		is_multibyte = pg_database_encoding_max_length() > 1; | ||||
| 
 | ||||
| 	if (is_multibyte && IS_HIGHBIT_SET(ch)) | ||||
| 		return true; | ||||
| 	else | ||||
| 		return isalpha_l((unsigned char) ch, locale->info.lt); | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| toupper_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	Assert(GetDatabaseEncoding() != PG_UTF8); | ||||
| 
 | ||||
| 	/* force C behavior for ASCII characters, per comments above */ | ||||
| 	if (locale->is_default && wc <= (pg_wchar) 127) | ||||
| 		return pg_ascii_toupper((unsigned char) wc); | ||||
| 	if (wc <= (pg_wchar) UCHAR_MAX) | ||||
| 		return toupper_l((unsigned char) wc, locale->info.lt); | ||||
| 	else | ||||
| 		return wc; | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| toupper_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	Assert(GetDatabaseEncoding() == PG_UTF8); | ||||
| 
 | ||||
| 	/* force C behavior for ASCII characters, per comments above */ | ||||
| 	if (locale->is_default && wc <= (pg_wchar) 127) | ||||
| 		return pg_ascii_toupper((unsigned char) wc); | ||||
| 	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) | ||||
| 		return towupper_l((wint_t) wc, locale->info.lt); | ||||
| 	else | ||||
| 		return wc; | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| tolower_libc_sb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	Assert(GetDatabaseEncoding() != PG_UTF8); | ||||
| 
 | ||||
| 	/* force C behavior for ASCII characters, per comments above */ | ||||
| 	if (locale->is_default && wc <= (pg_wchar) 127) | ||||
| 		return pg_ascii_tolower((unsigned char) wc); | ||||
| 	if (wc <= (pg_wchar) UCHAR_MAX) | ||||
| 		return tolower_l((unsigned char) wc, locale->info.lt); | ||||
| 	else | ||||
| 		return wc; | ||||
| } | ||||
| 
 | ||||
| static pg_wchar | ||||
| tolower_libc_mb(pg_wchar wc, pg_locale_t locale) | ||||
| { | ||||
| 	Assert(GetDatabaseEncoding() == PG_UTF8); | ||||
| 
 | ||||
| 	/* force C behavior for ASCII characters, per comments above */ | ||||
| 	if (locale->is_default && wc <= (pg_wchar) 127) | ||||
| 		return pg_ascii_tolower((unsigned char) wc); | ||||
| 	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) | ||||
| 		return towlower_l((wint_t) wc, locale->info.lt); | ||||
| 	else | ||||
| 		return wc; | ||||
| } | ||||
| 
 | ||||
| static const struct ctype_methods ctype_methods_libc_sb = { | ||||
| 	.strlower = strlower_libc_sb, | ||||
| 	.strtitle = strtitle_libc_sb, | ||||
| 	.strupper = strupper_libc_sb, | ||||
| 	.wc_isdigit = wc_isdigit_libc_sb, | ||||
| 	.wc_isalpha = wc_isalpha_libc_sb, | ||||
| 	.wc_isalnum = wc_isalnum_libc_sb, | ||||
| 	.wc_isupper = wc_isupper_libc_sb, | ||||
| 	.wc_islower = wc_islower_libc_sb, | ||||
| 	.wc_isgraph = wc_isgraph_libc_sb, | ||||
| 	.wc_isprint = wc_isprint_libc_sb, | ||||
| 	.wc_ispunct = wc_ispunct_libc_sb, | ||||
| 	.wc_isspace = wc_isspace_libc_sb, | ||||
| 	.char_is_cased = char_is_cased_libc, | ||||
| 	.char_tolower = char_tolower_libc, | ||||
| 	.wc_toupper = toupper_libc_sb, | ||||
| 	.wc_tolower = tolower_libc_sb, | ||||
| 	.max_chr = UCHAR_MAX, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but | ||||
|  * single-byte semantics for pattern matching. | ||||
|  */ | ||||
| static const struct ctype_methods ctype_methods_libc_other_mb = { | ||||
| 	.strlower = strlower_libc_mb, | ||||
| 	.strtitle = strtitle_libc_mb, | ||||
| 	.strupper = strupper_libc_mb, | ||||
| 	.wc_isdigit = wc_isdigit_libc_sb, | ||||
| 	.wc_isalpha = wc_isalpha_libc_sb, | ||||
| 	.wc_isalnum = wc_isalnum_libc_sb, | ||||
| 	.wc_isupper = wc_isupper_libc_sb, | ||||
| 	.wc_islower = wc_islower_libc_sb, | ||||
| 	.wc_isgraph = wc_isgraph_libc_sb, | ||||
| 	.wc_isprint = wc_isprint_libc_sb, | ||||
| 	.wc_ispunct = wc_ispunct_libc_sb, | ||||
| 	.wc_isspace = wc_isspace_libc_sb, | ||||
| 	.char_is_cased = char_is_cased_libc, | ||||
| 	.char_tolower = char_tolower_libc, | ||||
| 	.wc_toupper = toupper_libc_sb, | ||||
| 	.wc_tolower = tolower_libc_sb, | ||||
| 	.max_chr = UCHAR_MAX, | ||||
| }; | ||||
| 
 | ||||
| static const struct ctype_methods ctype_methods_libc_utf8 = { | ||||
| 	.strlower = strlower_libc_mb, | ||||
| 	.strtitle = strtitle_libc_mb, | ||||
| 	.strupper = strupper_libc_mb, | ||||
| 	.wc_isdigit = wc_isdigit_libc_mb, | ||||
| 	.wc_isalpha = wc_isalpha_libc_mb, | ||||
| 	.wc_isalnum = wc_isalnum_libc_mb, | ||||
| 	.wc_isupper = wc_isupper_libc_mb, | ||||
| 	.wc_islower = wc_islower_libc_mb, | ||||
| 	.wc_isgraph = wc_isgraph_libc_mb, | ||||
| 	.wc_isprint = wc_isprint_libc_mb, | ||||
| 	.wc_ispunct = wc_ispunct_libc_mb, | ||||
| 	.wc_isspace = wc_isspace_libc_mb, | ||||
| 	.char_is_cased = char_is_cased_libc, | ||||
| 	.char_tolower = char_tolower_libc, | ||||
| 	.wc_toupper = toupper_libc_mb, | ||||
| 	.wc_tolower = tolower_libc_mb, | ||||
| }; | ||||
| 
 | ||||
| static const struct collate_methods collate_methods_libc = { | ||||
| 	.strncoll = strncoll_libc, | ||||
| 	.strnxfrm = strnxfrm_libc, | ||||
| @ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { | ||||
| }; | ||||
| #endif | ||||
| 
 | ||||
| size_t | ||||
| strlower_libc(char *dst, size_t dstsize, const char *src, | ||||
| 			  ssize_t srclen, pg_locale_t locale) | ||||
| { | ||||
| 	if (pg_database_encoding_max_length() > 1) | ||||
| 		return strlower_libc_mb(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		return strlower_libc_sb(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| strtitle_libc(char *dst, size_t dstsize, const char *src, | ||||
| 			  ssize_t srclen, pg_locale_t locale) | ||||
| { | ||||
| 	if (pg_database_encoding_max_length() > 1) | ||||
| 		return strtitle_libc_mb(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		return strtitle_libc_sb(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| strupper_libc(char *dst, size_t dstsize, const char *src, | ||||
| 			  ssize_t srclen, pg_locale_t locale) | ||||
| { | ||||
| 	if (pg_database_encoding_max_length() > 1) | ||||
| 		return strupper_libc_mb(dst, dstsize, src, srclen, locale); | ||||
| 	else | ||||
| 		return strupper_libc_sb(dst, dstsize, src, srclen, locale); | ||||
| } | ||||
| 
 | ||||
| static size_t | ||||
| strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, | ||||
| 				 pg_locale_t locale) | ||||
| @ -481,6 +729,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context) | ||||
| #endif | ||||
| 			result->collate = &collate_methods_libc; | ||||
| 	} | ||||
| 	if (!result->ctype_is_c) | ||||
| 	{ | ||||
| 		if (GetDatabaseEncoding() == PG_UTF8) | ||||
| 			result->ctype = &ctype_methods_libc_utf8; | ||||
| 		else if (pg_database_encoding_max_length() > 1) | ||||
| 			result->ctype = &ctype_methods_libc_other_mb; | ||||
| 		else | ||||
| 			result->ctype = &ctype_methods_libc_sb; | ||||
| 	} | ||||
| 
 | ||||
| 	return result; | ||||
| } | ||||
|  | ||||
| @ -12,6 +12,8 @@ | ||||
| #ifndef _PG_LOCALE_ | ||||
| #define _PG_LOCALE_ | ||||
| 
 | ||||
| #include "mb/pg_wchar.h" | ||||
| 
 | ||||
| #ifdef USE_ICU | ||||
| #include <unicode/ucol.h> | ||||
| #endif | ||||
| @ -77,6 +79,52 @@ struct collate_methods | ||||
| 	bool		strxfrm_is_safe; | ||||
| }; | ||||
| 
 | ||||
| struct ctype_methods | ||||
| { | ||||
| 	/* case mapping: LOWER()/INITCAP()/UPPER() */ | ||||
| 	size_t		(*strlower) (char *dest, size_t destsize, | ||||
| 							 const char *src, ssize_t srclen, | ||||
| 							 pg_locale_t locale); | ||||
| 	size_t		(*strtitle) (char *dest, size_t destsize, | ||||
| 							 const char *src, ssize_t srclen, | ||||
| 							 pg_locale_t locale); | ||||
| 	size_t		(*strupper) (char *dest, size_t destsize, | ||||
| 							 const char *src, ssize_t srclen, | ||||
| 							 pg_locale_t locale); | ||||
| 	size_t		(*strfold) (char *dest, size_t destsize, | ||||
| 							const char *src, ssize_t srclen, | ||||
| 							pg_locale_t locale); | ||||
| 
 | ||||
| 	/* required */ | ||||
| 	bool		(*wc_isdigit) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_isalpha) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_isalnum) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_isupper) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_islower) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_isgraph) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_isprint) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_ispunct) (pg_wchar wc, pg_locale_t locale); | ||||
| 	bool		(*wc_isspace) (pg_wchar wc, pg_locale_t locale); | ||||
| 	pg_wchar	(*wc_toupper) (pg_wchar wc, pg_locale_t locale); | ||||
| 	pg_wchar	(*wc_tolower) (pg_wchar wc, pg_locale_t locale); | ||||
| 
 | ||||
| 	/* required */ | ||||
| 	bool		(*char_is_cased) (char ch, pg_locale_t locale); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Optional. If defined, will only be called for single-byte encodings. If | ||||
| 	 * not defined, or if the encoding is multibyte, will fall back to | ||||
| 	 * pg_strlower(). | ||||
| 	 */ | ||||
| 	char		(*char_tolower) (unsigned char ch, pg_locale_t locale); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * For regex and pattern matching efficiency, the maximum char value | ||||
| 	 * supported by the above methods. If zero, limit is set by regex code. | ||||
| 	 */ | ||||
| 	pg_wchar	max_chr; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * We use a discriminated union to hold either a locale_t or an ICU collator. | ||||
|  * pg_locale_t is occasionally checked for truth, so make it a pointer. | ||||
| @ -102,6 +150,7 @@ struct pg_locale_struct | ||||
| 	bool		is_default; | ||||
| 
 | ||||
| 	const struct collate_methods *collate;	/* NULL if collate_is_c */ | ||||
| 	const struct ctype_methods *ctype;	/* NULL if ctype_is_c */ | ||||
| 
 | ||||
| 	union | ||||
| 	{ | ||||
| @ -125,6 +174,10 @@ extern void init_database_collation(void); | ||||
| extern pg_locale_t pg_newlocale_from_collation(Oid collid); | ||||
| 
 | ||||
| extern char *get_collation_actual_version(char collprovider, const char *collcollate); | ||||
| 
 | ||||
| extern bool char_is_cased(char ch, pg_locale_t locale); | ||||
| extern bool char_tolower_enabled(pg_locale_t locale); | ||||
| extern char char_tolower(unsigned char ch, pg_locale_t locale); | ||||
| extern size_t pg_strlower(char *dst, size_t dstsize, | ||||
| 						  const char *src, ssize_t srclen, | ||||
| 						  pg_locale_t locale); | ||||
|  | ||||
| @ -1878,7 +1878,6 @@ PGTargetServerType | ||||
| PGTernaryBool | ||||
| PGTransactionStatusType | ||||
| PGVerbosity | ||||
| PG_Locale_Strategy | ||||
| PG_Lock_Status | ||||
| PG_init_t | ||||
| PGauthData | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user