mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 00:03:57 -04:00 
			
		
		
		
	Fix char2wchar/wchar2char to support collations properly.
These functions should take a pg_locale_t, not a collation OID, and should call mbstowcs_l/wcstombs_l where available. Where those functions are not available, temporarily select the correct locale with uselocale(). This change removes the bogus assumption that all locales selectable in a given database have the same wide-character conversion method; in particular, the collate.linux.utf8 regression test now passes with LC_CTYPE=C, so long as the database encoding is UTF8. I decided to move the char2wchar/wchar2char functions out of mbutils.c and into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus don't really belong with the mbutils.c functions. Keeping them where they were would have required importing pg_locale_t into pg_wchar.h somehow, which did not seem like a good plan.
This commit is contained in:
		
							parent
							
								
									bb85030630
								
							
						
					
					
						commit
						2ab0796d7a
					
				
							
								
								
									
										3
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @ -18985,7 +18985,8 @@ fi | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs | 
 | ||||||
|  | for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l | ||||||
| do | do | ||||||
| as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` | as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` | ||||||
| { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5 | { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5 | ||||||
|  | |||||||
| @ -1187,7 +1187,7 @@ PGAC_VAR_INT_TIMEZONE | |||||||
| AC_FUNC_ACCEPT_ARGTYPES | AC_FUNC_ACCEPT_ARGTYPES | ||||||
| PGAC_FUNC_GETTIMEOFDAY_1ARG | PGAC_FUNC_GETTIMEOFDAY_1ARG | ||||||
| 
 | 
 | ||||||
| AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs]) | AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l]) | ||||||
| 
 | 
 | ||||||
| AC_REPLACE_FUNCS(fseeko) | AC_REPLACE_FUNCS(fseeko) | ||||||
| case $host_os in | case $host_os in | ||||||
|  | |||||||
| @ -29,11 +29,12 @@ t_isdigit(const char *ptr) | |||||||
| 	int			clen = pg_mblen(ptr); | 	int			clen = pg_mblen(ptr); | ||||||
| 	wchar_t		character[2]; | 	wchar_t		character[2]; | ||||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||||
|  | 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||||
| 
 | 
 | ||||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||||
| 		return isdigit(TOUCHAR(ptr)); | 		return isdigit(TOUCHAR(ptr)); | ||||||
| 
 | 
 | ||||||
| 	char2wchar(character, 2, ptr, clen, collation); | 	char2wchar(character, 2, ptr, clen, mylocale); | ||||||
| 
 | 
 | ||||||
| 	return iswdigit((wint_t) character[0]); | 	return iswdigit((wint_t) character[0]); | ||||||
| } | } | ||||||
| @ -44,11 +45,12 @@ t_isspace(const char *ptr) | |||||||
| 	int			clen = pg_mblen(ptr); | 	int			clen = pg_mblen(ptr); | ||||||
| 	wchar_t		character[2]; | 	wchar_t		character[2]; | ||||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||||
|  | 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||||
| 
 | 
 | ||||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||||
| 		return isspace(TOUCHAR(ptr)); | 		return isspace(TOUCHAR(ptr)); | ||||||
| 
 | 
 | ||||||
| 	char2wchar(character, 2, ptr, clen, collation); | 	char2wchar(character, 2, ptr, clen, mylocale); | ||||||
| 
 | 
 | ||||||
| 	return iswspace((wint_t) character[0]); | 	return iswspace((wint_t) character[0]); | ||||||
| } | } | ||||||
| @ -59,11 +61,12 @@ t_isalpha(const char *ptr) | |||||||
| 	int			clen = pg_mblen(ptr); | 	int			clen = pg_mblen(ptr); | ||||||
| 	wchar_t		character[2]; | 	wchar_t		character[2]; | ||||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||||
|  | 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||||
| 
 | 
 | ||||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||||
| 		return isalpha(TOUCHAR(ptr)); | 		return isalpha(TOUCHAR(ptr)); | ||||||
| 
 | 
 | ||||||
| 	char2wchar(character, 2, ptr, clen, collation); | 	char2wchar(character, 2, ptr, clen, mylocale); | ||||||
| 
 | 
 | ||||||
| 	return iswalpha((wint_t) character[0]); | 	return iswalpha((wint_t) character[0]); | ||||||
| } | } | ||||||
| @ -74,11 +77,12 @@ t_isprint(const char *ptr) | |||||||
| 	int			clen = pg_mblen(ptr); | 	int			clen = pg_mblen(ptr); | ||||||
| 	wchar_t		character[2]; | 	wchar_t		character[2]; | ||||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||||
|  | 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||||
| 
 | 
 | ||||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||||
| 		return isprint(TOUCHAR(ptr)); | 		return isprint(TOUCHAR(ptr)); | ||||||
| 
 | 
 | ||||||
| 	char2wchar(character, 2, ptr, clen, collation); | 	char2wchar(character, 2, ptr, clen, mylocale); | ||||||
| 
 | 
 | ||||||
| 	return iswprint((wint_t) character[0]); | 	return iswprint((wint_t) character[0]); | ||||||
| } | } | ||||||
| @ -246,6 +250,7 @@ lowerstr_with_len(const char *str, int len) | |||||||
| 
 | 
 | ||||||
| #ifdef USE_WIDE_UPPER_LOWER | #ifdef USE_WIDE_UPPER_LOWER | ||||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||||
|  | 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 	if (len == 0) | 	if (len == 0) | ||||||
| @ -272,7 +277,7 @@ lowerstr_with_len(const char *str, int len) | |||||||
| 		 */ | 		 */ | ||||||
| 		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1)); | 		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1)); | ||||||
| 
 | 
 | ||||||
| 		wlen = char2wchar(wstr, len + 1, str, len, collation); | 		wlen = char2wchar(wstr, len + 1, str, len, mylocale); | ||||||
| 		Assert(wlen <= len); | 		Assert(wlen <= len); | ||||||
| 
 | 
 | ||||||
| 		while (*wptr) | 		while (*wptr) | ||||||
| @ -287,7 +292,7 @@ lowerstr_with_len(const char *str, int len) | |||||||
| 		len = pg_database_encoding_max_length() * wlen + 1; | 		len = pg_database_encoding_max_length() * wlen + 1; | ||||||
| 		out = (char *) palloc(len); | 		out = (char *) palloc(len); | ||||||
| 
 | 
 | ||||||
| 		wlen = wchar2char(out, wstr, len, collation); | 		wlen = wchar2char(out, wstr, len, mylocale); | ||||||
| 
 | 
 | ||||||
| 		pfree(wstr); | 		pfree(wstr); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -300,13 +300,14 @@ TParserInit(char *str, int len) | |||||||
| 	if (prs->charmaxlen > 1) | 	if (prs->charmaxlen > 1) | ||||||
| 	{ | 	{ | ||||||
| 		Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */ | 		Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */ | ||||||
|  | 		pg_locale_t	mylocale = 0;	/* TODO */ | ||||||
| 
 | 
 | ||||||
| 		prs->usewide = true; | 		prs->usewide = true; | ||||||
| 		if (lc_ctype_is_c(collation)) | 		if (lc_ctype_is_c(collation)) | ||||||
| 		{ | 		{ | ||||||
| 			/*
 | 			/*
 | ||||||
| 			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could | 			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could | ||||||
| 			 * be not equal to sizeof(wchar_t) | 			 * be different from sizeof(wchar_t) | ||||||
| 			 */ | 			 */ | ||||||
| 			prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1)); | 			prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1)); | ||||||
| 			pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr); | 			pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr); | ||||||
| @ -314,7 +315,8 @@ TParserInit(char *str, int len) | |||||||
| 		else | 		else | ||||||
| 		{ | 		{ | ||||||
| 			prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1)); | 			prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1)); | ||||||
| 			char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, collation); | 			char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, | ||||||
|  | 					   mylocale); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
|  | |||||||
| @ -1454,6 +1454,10 @@ str_numth(char *dest, char *num, int type) | |||||||
| 	return dest; | 	return dest; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*****************************************************************************
 | ||||||
|  |  *			upper/lower/initcap functions | ||||||
|  |  *****************************************************************************/ | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * If the system provides the needed functions for wide-character manipulation |  * If the system provides the needed functions for wide-character manipulation | ||||||
|  * (which are all standardized by C99), then we implement upper/lower/initcap |  * (which are all standardized by C99), then we implement upper/lower/initcap | ||||||
| @ -1527,7 +1531,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) | |||||||
| 		/* Output workspace cannot have more codes than input bytes */ | 		/* Output workspace cannot have more codes than input bytes */ | ||||||
| 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | ||||||
| 
 | 
 | ||||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid); | 		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); | ||||||
| 
 | 
 | ||||||
| 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | ||||||
| 		{ | 		{ | ||||||
| @ -1543,7 +1547,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) | |||||||
| 		result_size = curr_char * pg_database_encoding_max_length() + 1; | 		result_size = curr_char * pg_database_encoding_max_length() + 1; | ||||||
| 		result = palloc(result_size); | 		result = palloc(result_size); | ||||||
| 
 | 
 | ||||||
| 		wchar2char(result, workspace, result_size, collid); | 		wchar2char(result, workspace, result_size, mylocale); | ||||||
| 		pfree(workspace); | 		pfree(workspace); | ||||||
| 	} | 	} | ||||||
| #endif   /* USE_WIDE_UPPER_LOWER */ | #endif   /* USE_WIDE_UPPER_LOWER */ | ||||||
| @ -1648,7 +1652,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) | |||||||
| 		/* Output workspace cannot have more codes than input bytes */ | 		/* Output workspace cannot have more codes than input bytes */ | ||||||
| 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | ||||||
| 
 | 
 | ||||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid); | 		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); | ||||||
| 
 | 
 | ||||||
| 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | ||||||
| 		{ | 		{ | ||||||
| @ -1664,7 +1668,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) | |||||||
| 		result_size = curr_char * pg_database_encoding_max_length() + 1; | 		result_size = curr_char * pg_database_encoding_max_length() + 1; | ||||||
| 		result = palloc(result_size); | 		result = palloc(result_size); | ||||||
| 
 | 
 | ||||||
| 		wchar2char(result, workspace, result_size, collid); | 		wchar2char(result, workspace, result_size, mylocale); | ||||||
| 		pfree(workspace); | 		pfree(workspace); | ||||||
| 	} | 	} | ||||||
| #endif   /* USE_WIDE_UPPER_LOWER */ | #endif   /* USE_WIDE_UPPER_LOWER */ | ||||||
| @ -1781,7 +1785,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) | |||||||
| 		/* Output workspace cannot have more codes than input bytes */ | 		/* Output workspace cannot have more codes than input bytes */ | ||||||
| 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | ||||||
| 
 | 
 | ||||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid); | 		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); | ||||||
| 
 | 
 | ||||||
| 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | ||||||
| 		{ | 		{ | ||||||
| @ -1809,7 +1813,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) | |||||||
| 		result_size = curr_char * pg_database_encoding_max_length() + 1; | 		result_size = curr_char * pg_database_encoding_max_length() + 1; | ||||||
| 		result = palloc(result_size); | 		result = palloc(result_size); | ||||||
| 
 | 
 | ||||||
| 		wchar2char(result, workspace, result_size, collid); | 		wchar2char(result, workspace, result_size, mylocale); | ||||||
| 		pfree(workspace); | 		pfree(workspace); | ||||||
| 	} | 	} | ||||||
| #endif   /* USE_WIDE_UPPER_LOWER */ | #endif   /* USE_WIDE_UPPER_LOWER */ | ||||||
|  | |||||||
| @ -1030,3 +1030,176 @@ pg_newlocale_from_collation(Oid collid) | |||||||
| 
 | 
 | ||||||
| 	return cache_entry->locale; | 	return cache_entry->locale; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. | ||||||
|  |  * Therefore we keep them here rather than with the mbutils code. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #ifdef USE_WIDE_UPPER_LOWER | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * wchar2char --- convert wide characters to multibyte format | ||||||
|  |  * | ||||||
|  |  * This has the same API as the standard wcstombs_l() function; in particular, | ||||||
|  |  * tolen is the maximum number of bytes to store at *to, and *from must be | ||||||
|  |  * zero-terminated.  The output will be zero-terminated iff there is room. | ||||||
|  |  */ | ||||||
|  | size_t | ||||||
|  | wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) | ||||||
|  | { | ||||||
|  | 	size_t		result; | ||||||
|  | 
 | ||||||
|  | 	if (tolen == 0) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | #ifdef WIN32 | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and | ||||||
|  | 	 * for some reason mbstowcs and wcstombs won't do this for us, so we use | ||||||
|  | 	 * MultiByteToWideChar(). | ||||||
|  | 	 */ | ||||||
|  | 	if (GetDatabaseEncoding() == PG_UTF8) | ||||||
|  | 	{ | ||||||
|  | 		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, | ||||||
|  | 									 NULL, NULL); | ||||||
|  | 		/* A zero return is failure */ | ||||||
|  | 		if (result <= 0) | ||||||
|  | 			result = -1; | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			Assert(result <= tolen); | ||||||
|  | 			/* Microsoft counts the zero terminator in the result */ | ||||||
|  | 			result--; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | #endif   /* WIN32 */ | ||||||
|  | 	if (locale == (pg_locale_t) 0) | ||||||
|  | 	{ | ||||||
|  | 		/* Use wcstombs directly for the default locale */ | ||||||
|  | 		result = wcstombs(to, from, tolen); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | #ifdef HAVE_LOCALE_T | ||||||
|  | #ifdef HAVE_WCSTOMBS_L | ||||||
|  | 		/* Use wcstombs_l for nondefault locales */ | ||||||
|  | 		result = wcstombs_l(to, from, tolen, locale); | ||||||
|  | #else /* !HAVE_WCSTOMBS_L */ | ||||||
|  | 		/* We have to temporarily set the locale as current ... ugh */ | ||||||
|  | 		locale_t	save_locale = uselocale(locale); | ||||||
|  | 
 | ||||||
|  | 		result = wcstombs(to, from, tolen); | ||||||
|  | 
 | ||||||
|  | 		uselocale(save_locale); | ||||||
|  | #endif /* HAVE_WCSTOMBS_L */ | ||||||
|  | #else /* !HAVE_LOCALE_T */ | ||||||
|  | 		/* Can't have locale != 0 without HAVE_LOCALE_T */ | ||||||
|  | 		elog(ERROR, "wcstombs_l is not available"); | ||||||
|  | 		result = 0;				/* keep compiler quiet */ | ||||||
|  | #endif /* HAVE_LOCALE_T */ | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * char2wchar --- convert multibyte characters to wide characters | ||||||
|  |  * | ||||||
|  |  * This has almost the API of mbstowcs_l(), except that *from need not be | ||||||
|  |  * null-terminated; instead, the number of input bytes is specified as | ||||||
|  |  * fromlen.  Also, we ereport() rather than returning -1 for invalid | ||||||
|  |  * input encoding.	tolen is the maximum number of wchar_t's to store at *to. | ||||||
|  |  * The output will be zero-terminated iff there is room. | ||||||
|  |  */ | ||||||
|  | size_t | ||||||
|  | char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, | ||||||
|  | 		   pg_locale_t locale) | ||||||
|  | { | ||||||
|  | 	size_t		result; | ||||||
|  | 
 | ||||||
|  | 	if (tolen == 0) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | #ifdef WIN32 | ||||||
|  | 	/* See WIN32 "Unicode" comment above */ | ||||||
|  | 	if (GetDatabaseEncoding() == PG_UTF8) | ||||||
|  | 	{ | ||||||
|  | 		/* Win32 API does not work for zero-length input */ | ||||||
|  | 		if (fromlen == 0) | ||||||
|  | 			result = 0; | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); | ||||||
|  | 			/* A zero return is failure */ | ||||||
|  | 			if (result == 0) | ||||||
|  | 				result = -1; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (result != -1) | ||||||
|  | 		{ | ||||||
|  | 			Assert(result < tolen); | ||||||
|  | 			/* Append trailing null wchar (MultiByteToWideChar() does not) */ | ||||||
|  | 			to[result] = 0; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | #endif   /* WIN32 */ | ||||||
|  | 	{ | ||||||
|  | 		/* mbstowcs requires ending '\0' */ | ||||||
|  | 		char	   *str = pnstrdup(from, fromlen); | ||||||
|  | 
 | ||||||
|  | 		if (locale == (pg_locale_t) 0) | ||||||
|  | 		{ | ||||||
|  | 			/* Use mbstowcs directly for the default locale */ | ||||||
|  | 			result = mbstowcs(to, str, tolen); | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | #ifdef HAVE_LOCALE_T | ||||||
|  | #ifdef HAVE_WCSTOMBS_L | ||||||
|  | 			/* Use mbstowcs_l for nondefault locales */ | ||||||
|  | 			result = mbstowcs_l(to, str, tolen, locale); | ||||||
|  | #else /* !HAVE_WCSTOMBS_L */ | ||||||
|  | 			/* We have to temporarily set the locale as current ... ugh */ | ||||||
|  | 			locale_t	save_locale = uselocale(locale); | ||||||
|  | 
 | ||||||
|  | 			result = mbstowcs(to, str, tolen); | ||||||
|  | 
 | ||||||
|  | 			uselocale(save_locale); | ||||||
|  | #endif /* HAVE_WCSTOMBS_L */ | ||||||
|  | #else /* !HAVE_LOCALE_T */ | ||||||
|  | 			/* Can't have locale != 0 without HAVE_LOCALE_T */ | ||||||
|  | 			elog(ERROR, "mbstowcs_l is not available"); | ||||||
|  | 			result = 0;				/* keep compiler quiet */ | ||||||
|  | #endif /* HAVE_LOCALE_T */ | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		pfree(str); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (result == -1) | ||||||
|  | 	{ | ||||||
|  | 		/*
 | ||||||
|  | 		 * Invalid multibyte character encountered.  We try to give a useful | ||||||
|  | 		 * error message by letting pg_verifymbstr check the string.  But it's | ||||||
|  | 		 * possible that the string is OK to us, and not OK to mbstowcs --- | ||||||
|  | 		 * this suggests that the LC_CTYPE locale is different from the | ||||||
|  | 		 * database encoding.  Give a generic error message if verifymbstr | ||||||
|  | 		 * can't find anything wrong. | ||||||
|  | 		 */ | ||||||
|  | 		pg_verifymbstr(from, fromlen, false);	/* might not return */ | ||||||
|  | 		/* but if it does ... */ | ||||||
|  | 		ereport(ERROR, | ||||||
|  | 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), | ||||||
|  | 				 errmsg("invalid multibyte character for locale"), | ||||||
|  | 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif /* USE_WIDE_UPPER_LOWER */ | ||||||
|  | |||||||
| @ -13,7 +13,6 @@ | |||||||
| #include "mb/pg_wchar.h" | #include "mb/pg_wchar.h" | ||||||
| #include "utils/builtins.h" | #include "utils/builtins.h" | ||||||
| #include "utils/memutils.h" | #include "utils/memutils.h" | ||||||
| #include "utils/pg_locale.h" |  | ||||||
| #include "utils/syscache.h" | #include "utils/syscache.h" | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -689,126 +688,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| #ifdef USE_WIDE_UPPER_LOWER |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * wchar2char --- convert wide characters to multibyte format |  | ||||||
|  * |  | ||||||
|  * This has the same API as the standard wcstombs() function; in particular, |  | ||||||
|  * tolen is the maximum number of bytes to store at *to, and *from must be |  | ||||||
|  * zero-terminated.  The output will be zero-terminated iff there is room. |  | ||||||
|  */ |  | ||||||
| size_t |  | ||||||
| wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation) |  | ||||||
| { |  | ||||||
| 	size_t		result; |  | ||||||
| 
 |  | ||||||
| 	if (tolen == 0) |  | ||||||
| 		return 0; |  | ||||||
| 
 |  | ||||||
| #ifdef WIN32 |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and |  | ||||||
| 	 * for some reason mbstowcs and wcstombs won't do this for us, so we use |  | ||||||
| 	 * MultiByteToWideChar(). |  | ||||||
| 	 */ |  | ||||||
| 	if (GetDatabaseEncoding() == PG_UTF8) |  | ||||||
| 	{ |  | ||||||
| 		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, |  | ||||||
| 									 NULL, NULL); |  | ||||||
| 		/* A zero return is failure */ |  | ||||||
| 		if (result <= 0) |  | ||||||
| 			result = -1; |  | ||||||
| 		else |  | ||||||
| 		{ |  | ||||||
| 			Assert(result <= tolen); |  | ||||||
| 			/* Microsoft counts the zero terminator in the result */ |  | ||||||
| 			result--; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	else |  | ||||||
| #endif   /* WIN32 */ |  | ||||||
| 	{ |  | ||||||
| 		Assert(!lc_ctype_is_c(collation)); |  | ||||||
| 		result = wcstombs(to, from, tolen); |  | ||||||
| 	} |  | ||||||
| 	return result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * char2wchar --- convert multibyte characters to wide characters |  | ||||||
|  * |  | ||||||
|  * This has almost the API of mbstowcs(), except that *from need not be |  | ||||||
|  * null-terminated; instead, the number of input bytes is specified as |  | ||||||
|  * fromlen.  Also, we ereport() rather than returning -1 for invalid |  | ||||||
|  * input encoding.	tolen is the maximum number of wchar_t's to store at *to. |  | ||||||
|  * The output will be zero-terminated iff there is room. |  | ||||||
|  */ |  | ||||||
| size_t |  | ||||||
| char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation) |  | ||||||
| { |  | ||||||
| 	size_t		result; |  | ||||||
| 
 |  | ||||||
| 	if (tolen == 0) |  | ||||||
| 		return 0; |  | ||||||
| 
 |  | ||||||
| #ifdef WIN32 |  | ||||||
| 	/* See WIN32 "Unicode" comment above */ |  | ||||||
| 	if (GetDatabaseEncoding() == PG_UTF8) |  | ||||||
| 	{ |  | ||||||
| 		/* Win32 API does not work for zero-length input */ |  | ||||||
| 		if (fromlen == 0) |  | ||||||
| 			result = 0; |  | ||||||
| 		else |  | ||||||
| 		{ |  | ||||||
| 			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); |  | ||||||
| 			/* A zero return is failure */ |  | ||||||
| 			if (result == 0) |  | ||||||
| 				result = -1; |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		if (result != -1) |  | ||||||
| 		{ |  | ||||||
| 			Assert(result < tolen); |  | ||||||
| 			/* Append trailing null wchar (MultiByteToWideChar() does not) */ |  | ||||||
| 			to[result] = 0; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	else |  | ||||||
| #endif   /* WIN32 */ |  | ||||||
| 	{ |  | ||||||
| 		/* mbstowcs requires ending '\0' */ |  | ||||||
| 		char	   *str = pnstrdup(from, fromlen); |  | ||||||
| 
 |  | ||||||
| 		Assert(!lc_ctype_is_c(collation)); |  | ||||||
| 		result = mbstowcs(to, str, tolen); |  | ||||||
| 		pfree(str); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (result == -1) |  | ||||||
| 	{ |  | ||||||
| 		/*
 |  | ||||||
| 		 * Invalid multibyte character encountered.  We try to give a useful |  | ||||||
| 		 * error message by letting pg_verifymbstr check the string.  But it's |  | ||||||
| 		 * possible that the string is OK to us, and not OK to mbstowcs --- |  | ||||||
| 		 * this suggests that the LC_CTYPE locale is different from the |  | ||||||
| 		 * database encoding.  Give a generic error message if verifymbstr |  | ||||||
| 		 * can't find anything wrong. |  | ||||||
| 		 */ |  | ||||||
| 		pg_verifymbstr(from, fromlen, false);	/* might not return */ |  | ||||||
| 		/* but if it does ... */ |  | ||||||
| 		ereport(ERROR, |  | ||||||
| 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), |  | ||||||
| 				 errmsg("invalid multibyte character for locale"), |  | ||||||
| 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return result; |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| /* convert a multibyte string to a wchar */ | /* convert a multibyte string to a wchar */ | ||||||
| int | int | ||||||
| pg_mb2wchar(const char *from, pg_wchar *to) | pg_mb2wchar(const char *from, pg_wchar *to) | ||||||
|  | |||||||
| @ -19,8 +19,6 @@ | |||||||
| #ifndef PG_WCHAR_H | #ifndef PG_WCHAR_H | ||||||
| #define PG_WCHAR_H | #define PG_WCHAR_H | ||||||
| 
 | 
 | ||||||
| #include <sys/types.h> |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * The pg_wchar type |  * The pg_wchar type | ||||||
|  */ |  */ | ||||||
| @ -392,11 +390,6 @@ extern int	pg_mbcharcliplen(const char *mbstr, int len, int imit); | |||||||
| extern int	pg_encoding_max_length(int encoding); | extern int	pg_encoding_max_length(int encoding); | ||||||
| extern int	pg_database_encoding_max_length(void); | extern int	pg_database_encoding_max_length(void); | ||||||
| 
 | 
 | ||||||
| #ifdef USE_WIDE_UPPER_LOWER |  | ||||||
| extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation); |  | ||||||
| extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation); |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| extern int	PrepareClientEncoding(int encoding); | extern int	PrepareClientEncoding(int encoding); | ||||||
| extern int	SetClientEncoding(int encoding); | extern int	SetClientEncoding(int encoding); | ||||||
| extern void InitializeClientEncoding(void); | extern void InitializeClientEncoding(void); | ||||||
|  | |||||||
| @ -656,6 +656,9 @@ | |||||||
| /* Define to 1 if you have the `wcstombs' function. */ | /* Define to 1 if you have the `wcstombs' function. */ | ||||||
| #undef HAVE_WCSTOMBS | #undef HAVE_WCSTOMBS | ||||||
| 
 | 
 | ||||||
|  | /* Define to 1 if you have the `wcstombs_l' function. */ | ||||||
|  | #undef HAVE_WCSTOMBS_L | ||||||
|  | 
 | ||||||
| /* Define to 1 if you have the <wctype.h> header file. */ | /* Define to 1 if you have the <wctype.h> header file. */ | ||||||
| #undef HAVE_WCTYPE_H | #undef HAVE_WCTYPE_H | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -538,6 +538,9 @@ | |||||||
| /* Define to 1 if you have the `wcstombs' function. */ | /* Define to 1 if you have the `wcstombs' function. */ | ||||||
| #define HAVE_WCSTOMBS 1 | #define HAVE_WCSTOMBS 1 | ||||||
| 
 | 
 | ||||||
|  | /* Define to 1 if you have the `wcstombs_l' function. */ | ||||||
|  | #define HAVE_WCSTOMBS_L 1 | ||||||
|  | 
 | ||||||
| /* Define to 1 if you have the <wctype.h> header file. */ | /* Define to 1 if you have the <wctype.h> header file. */ | ||||||
| #define HAVE_WCTYPE_H 1 | #define HAVE_WCTYPE_H 1 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -304,6 +304,8 @@ typedef int pid_t; | |||||||
| #define iswspace_l _iswspace_l | #define iswspace_l _iswspace_l | ||||||
| #define strcoll_l _strcoll_l | #define strcoll_l _strcoll_l | ||||||
| #define wcscoll_l _wcscoll_l | #define wcscoll_l _wcscoll_l | ||||||
|  | #define wcstombs_l _wcstombs_l | ||||||
|  | #define mbstowcs_l _mbstowcs_l | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /* In backend/port/win32/signal.c */ | /* In backend/port/win32/signal.c */ | ||||||
|  | |||||||
| @ -72,4 +72,12 @@ typedef int pg_locale_t; | |||||||
| 
 | 
 | ||||||
| extern pg_locale_t pg_newlocale_from_collation(Oid collid); | extern pg_locale_t pg_newlocale_from_collation(Oid collid); | ||||||
| 
 | 
 | ||||||
|  | /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ | ||||||
|  | #ifdef USE_WIDE_UPPER_LOWER | ||||||
|  | extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, | ||||||
|  | 		   pg_locale_t locale); | ||||||
|  | extern size_t char2wchar(wchar_t *to, size_t tolen, | ||||||
|  | 		   const char *from, size_t fromlen, pg_locale_t locale); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #endif   /* _PG_LOCALE_ */ | #endif   /* _PG_LOCALE_ */ | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user