mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 00:03:57 -04:00 
			
		
		
		
	Fix char2wchar/wchar2char to support collations properly.
These functions should take a pg_locale_t, not a collation OID, and should call mbstowcs_l/wcstombs_l where available. Where those functions are not available, temporarily select the correct locale with uselocale(). This change removes the bogus assumption that all locales selectable in a given database have the same wide-character conversion method; in particular, the collate.linux.utf8 regression test now passes with LC_CTYPE=C, so long as the database encoding is UTF8. I decided to move the char2wchar/wchar2char functions out of mbutils.c and into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus don't really belong with the mbutils.c functions. Keeping them where they were would have required importing pg_locale_t into pg_wchar.h somehow, which did not seem like a good plan.
This commit is contained in:
		
							parent
							
								
									bb85030630
								
							
						
					
					
						commit
						2ab0796d7a
					
				
							
								
								
									
										3
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @ -18985,7 +18985,8 @@ fi | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs | ||||
| 
 | ||||
| for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l | ||||
| do | ||||
| as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` | ||||
| { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5 | ||||
|  | ||||
| @ -1187,7 +1187,7 @@ PGAC_VAR_INT_TIMEZONE | ||||
| AC_FUNC_ACCEPT_ARGTYPES | ||||
| PGAC_FUNC_GETTIMEOFDAY_1ARG | ||||
| 
 | ||||
| AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs]) | ||||
| AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l]) | ||||
| 
 | ||||
| AC_REPLACE_FUNCS(fseeko) | ||||
| case $host_os in | ||||
|  | ||||
| @ -29,11 +29,12 @@ t_isdigit(const char *ptr) | ||||
| 	int			clen = pg_mblen(ptr); | ||||
| 	wchar_t		character[2]; | ||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||
| 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||
| 
 | ||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||
| 		return isdigit(TOUCHAR(ptr)); | ||||
| 
 | ||||
| 	char2wchar(character, 2, ptr, clen, collation); | ||||
| 	char2wchar(character, 2, ptr, clen, mylocale); | ||||
| 
 | ||||
| 	return iswdigit((wint_t) character[0]); | ||||
| } | ||||
| @ -44,11 +45,12 @@ t_isspace(const char *ptr) | ||||
| 	int			clen = pg_mblen(ptr); | ||||
| 	wchar_t		character[2]; | ||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||
| 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||
| 
 | ||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||
| 		return isspace(TOUCHAR(ptr)); | ||||
| 
 | ||||
| 	char2wchar(character, 2, ptr, clen, collation); | ||||
| 	char2wchar(character, 2, ptr, clen, mylocale); | ||||
| 
 | ||||
| 	return iswspace((wint_t) character[0]); | ||||
| } | ||||
| @ -59,11 +61,12 @@ t_isalpha(const char *ptr) | ||||
| 	int			clen = pg_mblen(ptr); | ||||
| 	wchar_t		character[2]; | ||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||
| 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||
| 
 | ||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||
| 		return isalpha(TOUCHAR(ptr)); | ||||
| 
 | ||||
| 	char2wchar(character, 2, ptr, clen, collation); | ||||
| 	char2wchar(character, 2, ptr, clen, mylocale); | ||||
| 
 | ||||
| 	return iswalpha((wint_t) character[0]); | ||||
| } | ||||
| @ -74,11 +77,12 @@ t_isprint(const char *ptr) | ||||
| 	int			clen = pg_mblen(ptr); | ||||
| 	wchar_t		character[2]; | ||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||
| 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||
| 
 | ||||
| 	if (clen == 1 || lc_ctype_is_c(collation)) | ||||
| 		return isprint(TOUCHAR(ptr)); | ||||
| 
 | ||||
| 	char2wchar(character, 2, ptr, clen, collation); | ||||
| 	char2wchar(character, 2, ptr, clen, mylocale); | ||||
| 
 | ||||
| 	return iswprint((wint_t) character[0]); | ||||
| } | ||||
| @ -246,6 +250,7 @@ lowerstr_with_len(const char *str, int len) | ||||
| 
 | ||||
| #ifdef USE_WIDE_UPPER_LOWER | ||||
| 	Oid			collation = DEFAULT_COLLATION_OID;		/* TODO */ | ||||
| 	pg_locale_t	mylocale = 0;	/* TODO */ | ||||
| #endif | ||||
| 
 | ||||
| 	if (len == 0) | ||||
| @ -272,7 +277,7 @@ lowerstr_with_len(const char *str, int len) | ||||
| 		 */ | ||||
| 		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1)); | ||||
| 
 | ||||
| 		wlen = char2wchar(wstr, len + 1, str, len, collation); | ||||
| 		wlen = char2wchar(wstr, len + 1, str, len, mylocale); | ||||
| 		Assert(wlen <= len); | ||||
| 
 | ||||
| 		while (*wptr) | ||||
| @ -287,7 +292,7 @@ lowerstr_with_len(const char *str, int len) | ||||
| 		len = pg_database_encoding_max_length() * wlen + 1; | ||||
| 		out = (char *) palloc(len); | ||||
| 
 | ||||
| 		wlen = wchar2char(out, wstr, len, collation); | ||||
| 		wlen = wchar2char(out, wstr, len, mylocale); | ||||
| 
 | ||||
| 		pfree(wstr); | ||||
| 
 | ||||
|  | ||||
| @ -300,13 +300,14 @@ TParserInit(char *str, int len) | ||||
| 	if (prs->charmaxlen > 1) | ||||
| 	{ | ||||
| 		Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */ | ||||
| 		pg_locale_t	mylocale = 0;	/* TODO */ | ||||
| 
 | ||||
| 		prs->usewide = true; | ||||
| 		if (lc_ctype_is_c(collation)) | ||||
| 		{ | ||||
| 			/*
 | ||||
| 			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could | ||||
| 			 * be not equal to sizeof(wchar_t) | ||||
| 			 * be different from sizeof(wchar_t) | ||||
| 			 */ | ||||
| 			prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1)); | ||||
| 			pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr); | ||||
| @ -314,7 +315,8 @@ TParserInit(char *str, int len) | ||||
| 		else | ||||
| 		{ | ||||
| 			prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1)); | ||||
| 			char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, collation); | ||||
| 			char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, | ||||
| 					   mylocale); | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
|  | ||||
| @ -1454,6 +1454,10 @@ str_numth(char *dest, char *num, int type) | ||||
| 	return dest; | ||||
| } | ||||
| 
 | ||||
| /*****************************************************************************
 | ||||
|  *			upper/lower/initcap functions | ||||
|  *****************************************************************************/ | ||||
| 
 | ||||
| /*
 | ||||
|  * If the system provides the needed functions for wide-character manipulation | ||||
|  * (which are all standardized by C99), then we implement upper/lower/initcap | ||||
| @ -1527,7 +1531,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) | ||||
| 		/* Output workspace cannot have more codes than input bytes */ | ||||
| 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | ||||
| 
 | ||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid); | ||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); | ||||
| 
 | ||||
| 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | ||||
| 		{ | ||||
| @ -1543,7 +1547,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) | ||||
| 		result_size = curr_char * pg_database_encoding_max_length() + 1; | ||||
| 		result = palloc(result_size); | ||||
| 
 | ||||
| 		wchar2char(result, workspace, result_size, collid); | ||||
| 		wchar2char(result, workspace, result_size, mylocale); | ||||
| 		pfree(workspace); | ||||
| 	} | ||||
| #endif   /* USE_WIDE_UPPER_LOWER */ | ||||
| @ -1648,7 +1652,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) | ||||
| 		/* Output workspace cannot have more codes than input bytes */ | ||||
| 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | ||||
| 
 | ||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid); | ||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); | ||||
| 
 | ||||
| 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | ||||
| 		{ | ||||
| @ -1664,7 +1668,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) | ||||
| 		result_size = curr_char * pg_database_encoding_max_length() + 1; | ||||
| 		result = palloc(result_size); | ||||
| 
 | ||||
| 		wchar2char(result, workspace, result_size, collid); | ||||
| 		wchar2char(result, workspace, result_size, mylocale); | ||||
| 		pfree(workspace); | ||||
| 	} | ||||
| #endif   /* USE_WIDE_UPPER_LOWER */ | ||||
| @ -1781,7 +1785,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) | ||||
| 		/* Output workspace cannot have more codes than input bytes */ | ||||
| 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); | ||||
| 
 | ||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid); | ||||
| 		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); | ||||
| 
 | ||||
| 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++) | ||||
| 		{ | ||||
| @ -1809,7 +1813,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) | ||||
| 		result_size = curr_char * pg_database_encoding_max_length() + 1; | ||||
| 		result = palloc(result_size); | ||||
| 
 | ||||
| 		wchar2char(result, workspace, result_size, collid); | ||||
| 		wchar2char(result, workspace, result_size, mylocale); | ||||
| 		pfree(workspace); | ||||
| 	} | ||||
| #endif   /* USE_WIDE_UPPER_LOWER */ | ||||
|  | ||||
| @ -1030,3 +1030,176 @@ pg_newlocale_from_collation(Oid collid) | ||||
| 
 | ||||
| 	return cache_entry->locale; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. | ||||
|  * Therefore we keep them here rather than with the mbutils code. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef USE_WIDE_UPPER_LOWER | ||||
| 
 | ||||
| /*
 | ||||
|  * wchar2char --- convert wide characters to multibyte format | ||||
|  * | ||||
|  * This has the same API as the standard wcstombs_l() function; in particular, | ||||
|  * tolen is the maximum number of bytes to store at *to, and *from must be | ||||
|  * zero-terminated.  The output will be zero-terminated iff there is room. | ||||
|  */ | ||||
| size_t | ||||
| wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) | ||||
| { | ||||
| 	size_t		result; | ||||
| 
 | ||||
| 	if (tolen == 0) | ||||
| 		return 0; | ||||
| 
 | ||||
| #ifdef WIN32 | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and | ||||
| 	 * for some reason mbstowcs and wcstombs won't do this for us, so we use | ||||
| 	 * MultiByteToWideChar(). | ||||
| 	 */ | ||||
| 	if (GetDatabaseEncoding() == PG_UTF8) | ||||
| 	{ | ||||
| 		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, | ||||
| 									 NULL, NULL); | ||||
| 		/* A zero return is failure */ | ||||
| 		if (result <= 0) | ||||
| 			result = -1; | ||||
| 		else | ||||
| 		{ | ||||
| 			Assert(result <= tolen); | ||||
| 			/* Microsoft counts the zero terminator in the result */ | ||||
| 			result--; | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| #endif   /* WIN32 */ | ||||
| 	if (locale == (pg_locale_t) 0) | ||||
| 	{ | ||||
| 		/* Use wcstombs directly for the default locale */ | ||||
| 		result = wcstombs(to, from, tolen); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| #ifdef HAVE_LOCALE_T | ||||
| #ifdef HAVE_WCSTOMBS_L | ||||
| 		/* Use wcstombs_l for nondefault locales */ | ||||
| 		result = wcstombs_l(to, from, tolen, locale); | ||||
| #else /* !HAVE_WCSTOMBS_L */ | ||||
| 		/* We have to temporarily set the locale as current ... ugh */ | ||||
| 		locale_t	save_locale = uselocale(locale); | ||||
| 
 | ||||
| 		result = wcstombs(to, from, tolen); | ||||
| 
 | ||||
| 		uselocale(save_locale); | ||||
| #endif /* HAVE_WCSTOMBS_L */ | ||||
| #else /* !HAVE_LOCALE_T */ | ||||
| 		/* Can't have locale != 0 without HAVE_LOCALE_T */ | ||||
| 		elog(ERROR, "wcstombs_l is not available"); | ||||
| 		result = 0;				/* keep compiler quiet */ | ||||
| #endif /* HAVE_LOCALE_T */ | ||||
| 	} | ||||
| 
 | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * char2wchar --- convert multibyte characters to wide characters | ||||
|  * | ||||
|  * This has almost the API of mbstowcs_l(), except that *from need not be | ||||
|  * null-terminated; instead, the number of input bytes is specified as | ||||
|  * fromlen.  Also, we ereport() rather than returning -1 for invalid | ||||
|  * input encoding.	tolen is the maximum number of wchar_t's to store at *to. | ||||
|  * The output will be zero-terminated iff there is room. | ||||
|  */ | ||||
| size_t | ||||
| char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, | ||||
| 		   pg_locale_t locale) | ||||
| { | ||||
| 	size_t		result; | ||||
| 
 | ||||
| 	if (tolen == 0) | ||||
| 		return 0; | ||||
| 
 | ||||
| #ifdef WIN32 | ||||
| 	/* See WIN32 "Unicode" comment above */ | ||||
| 	if (GetDatabaseEncoding() == PG_UTF8) | ||||
| 	{ | ||||
| 		/* Win32 API does not work for zero-length input */ | ||||
| 		if (fromlen == 0) | ||||
| 			result = 0; | ||||
| 		else | ||||
| 		{ | ||||
| 			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); | ||||
| 			/* A zero return is failure */ | ||||
| 			if (result == 0) | ||||
| 				result = -1; | ||||
| 		} | ||||
| 
 | ||||
| 		if (result != -1) | ||||
| 		{ | ||||
| 			Assert(result < tolen); | ||||
| 			/* Append trailing null wchar (MultiByteToWideChar() does not) */ | ||||
| 			to[result] = 0; | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| #endif   /* WIN32 */ | ||||
| 	{ | ||||
| 		/* mbstowcs requires ending '\0' */ | ||||
| 		char	   *str = pnstrdup(from, fromlen); | ||||
| 
 | ||||
| 		if (locale == (pg_locale_t) 0) | ||||
| 		{ | ||||
| 			/* Use mbstowcs directly for the default locale */ | ||||
| 			result = mbstowcs(to, str, tolen); | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| #ifdef HAVE_LOCALE_T | ||||
| #ifdef HAVE_WCSTOMBS_L | ||||
| 			/* Use mbstowcs_l for nondefault locales */ | ||||
| 			result = mbstowcs_l(to, str, tolen, locale); | ||||
| #else /* !HAVE_WCSTOMBS_L */ | ||||
| 			/* We have to temporarily set the locale as current ... ugh */ | ||||
| 			locale_t	save_locale = uselocale(locale); | ||||
| 
 | ||||
| 			result = mbstowcs(to, str, tolen); | ||||
| 
 | ||||
| 			uselocale(save_locale); | ||||
| #endif /* HAVE_WCSTOMBS_L */ | ||||
| #else /* !HAVE_LOCALE_T */ | ||||
| 			/* Can't have locale != 0 without HAVE_LOCALE_T */ | ||||
| 			elog(ERROR, "mbstowcs_l is not available"); | ||||
| 			result = 0;				/* keep compiler quiet */ | ||||
| #endif /* HAVE_LOCALE_T */ | ||||
| 		} | ||||
| 
 | ||||
| 		pfree(str); | ||||
| 	} | ||||
| 
 | ||||
| 	if (result == -1) | ||||
| 	{ | ||||
| 		/*
 | ||||
| 		 * Invalid multibyte character encountered.  We try to give a useful | ||||
| 		 * error message by letting pg_verifymbstr check the string.  But it's | ||||
| 		 * possible that the string is OK to us, and not OK to mbstowcs --- | ||||
| 		 * this suggests that the LC_CTYPE locale is different from the | ||||
| 		 * database encoding.  Give a generic error message if verifymbstr | ||||
| 		 * can't find anything wrong. | ||||
| 		 */ | ||||
| 		pg_verifymbstr(from, fromlen, false);	/* might not return */ | ||||
| 		/* but if it does ... */ | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), | ||||
| 				 errmsg("invalid multibyte character for locale"), | ||||
| 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); | ||||
| 	} | ||||
| 
 | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| #endif /* USE_WIDE_UPPER_LOWER */ | ||||
|  | ||||
| @ -13,7 +13,6 @@ | ||||
| #include "mb/pg_wchar.h" | ||||
| #include "utils/builtins.h" | ||||
| #include "utils/memutils.h" | ||||
| #include "utils/pg_locale.h" | ||||
| #include "utils/syscache.h" | ||||
| 
 | ||||
| /*
 | ||||
| @ -689,126 +688,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| #ifdef USE_WIDE_UPPER_LOWER | ||||
| 
 | ||||
| /*
 | ||||
|  * wchar2char --- convert wide characters to multibyte format | ||||
|  * | ||||
|  * This has the same API as the standard wcstombs() function; in particular, | ||||
|  * tolen is the maximum number of bytes to store at *to, and *from must be | ||||
|  * zero-terminated.  The output will be zero-terminated iff there is room. | ||||
|  */ | ||||
| size_t | ||||
| wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation) | ||||
| { | ||||
| 	size_t		result; | ||||
| 
 | ||||
| 	if (tolen == 0) | ||||
| 		return 0; | ||||
| 
 | ||||
| #ifdef WIN32 | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and | ||||
| 	 * for some reason mbstowcs and wcstombs won't do this for us, so we use | ||||
| 	 * MultiByteToWideChar(). | ||||
| 	 */ | ||||
| 	if (GetDatabaseEncoding() == PG_UTF8) | ||||
| 	{ | ||||
| 		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, | ||||
| 									 NULL, NULL); | ||||
| 		/* A zero return is failure */ | ||||
| 		if (result <= 0) | ||||
| 			result = -1; | ||||
| 		else | ||||
| 		{ | ||||
| 			Assert(result <= tolen); | ||||
| 			/* Microsoft counts the zero terminator in the result */ | ||||
| 			result--; | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| #endif   /* WIN32 */ | ||||
| 	{ | ||||
| 		Assert(!lc_ctype_is_c(collation)); | ||||
| 		result = wcstombs(to, from, tolen); | ||||
| 	} | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * char2wchar --- convert multibyte characters to wide characters | ||||
|  * | ||||
|  * This has almost the API of mbstowcs(), except that *from need not be | ||||
|  * null-terminated; instead, the number of input bytes is specified as | ||||
|  * fromlen.  Also, we ereport() rather than returning -1 for invalid | ||||
|  * input encoding.	tolen is the maximum number of wchar_t's to store at *to. | ||||
|  * The output will be zero-terminated iff there is room. | ||||
|  */ | ||||
| size_t | ||||
| char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation) | ||||
| { | ||||
| 	size_t		result; | ||||
| 
 | ||||
| 	if (tolen == 0) | ||||
| 		return 0; | ||||
| 
 | ||||
| #ifdef WIN32 | ||||
| 	/* See WIN32 "Unicode" comment above */ | ||||
| 	if (GetDatabaseEncoding() == PG_UTF8) | ||||
| 	{ | ||||
| 		/* Win32 API does not work for zero-length input */ | ||||
| 		if (fromlen == 0) | ||||
| 			result = 0; | ||||
| 		else | ||||
| 		{ | ||||
| 			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); | ||||
| 			/* A zero return is failure */ | ||||
| 			if (result == 0) | ||||
| 				result = -1; | ||||
| 		} | ||||
| 
 | ||||
| 		if (result != -1) | ||||
| 		{ | ||||
| 			Assert(result < tolen); | ||||
| 			/* Append trailing null wchar (MultiByteToWideChar() does not) */ | ||||
| 			to[result] = 0; | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| #endif   /* WIN32 */ | ||||
| 	{ | ||||
| 		/* mbstowcs requires ending '\0' */ | ||||
| 		char	   *str = pnstrdup(from, fromlen); | ||||
| 
 | ||||
| 		Assert(!lc_ctype_is_c(collation)); | ||||
| 		result = mbstowcs(to, str, tolen); | ||||
| 		pfree(str); | ||||
| 	} | ||||
| 
 | ||||
| 	if (result == -1) | ||||
| 	{ | ||||
| 		/*
 | ||||
| 		 * Invalid multibyte character encountered.  We try to give a useful | ||||
| 		 * error message by letting pg_verifymbstr check the string.  But it's | ||||
| 		 * possible that the string is OK to us, and not OK to mbstowcs --- | ||||
| 		 * this suggests that the LC_CTYPE locale is different from the | ||||
| 		 * database encoding.  Give a generic error message if verifymbstr | ||||
| 		 * can't find anything wrong. | ||||
| 		 */ | ||||
| 		pg_verifymbstr(from, fromlen, false);	/* might not return */ | ||||
| 		/* but if it does ... */ | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), | ||||
| 				 errmsg("invalid multibyte character for locale"), | ||||
| 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); | ||||
| 	} | ||||
| 
 | ||||
| 	return result; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /* convert a multibyte string to a wchar */ | ||||
| int | ||||
| pg_mb2wchar(const char *from, pg_wchar *to) | ||||
|  | ||||
| @ -19,8 +19,6 @@ | ||||
| #ifndef PG_WCHAR_H | ||||
| #define PG_WCHAR_H | ||||
| 
 | ||||
| #include <sys/types.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * The pg_wchar type | ||||
|  */ | ||||
| @ -392,11 +390,6 @@ extern int	pg_mbcharcliplen(const char *mbstr, int len, int imit); | ||||
| extern int	pg_encoding_max_length(int encoding); | ||||
| extern int	pg_database_encoding_max_length(void); | ||||
| 
 | ||||
| #ifdef USE_WIDE_UPPER_LOWER | ||||
| extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation); | ||||
| extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation); | ||||
| #endif | ||||
| 
 | ||||
| extern int	PrepareClientEncoding(int encoding); | ||||
| extern int	SetClientEncoding(int encoding); | ||||
| extern void InitializeClientEncoding(void); | ||||
|  | ||||
| @ -656,6 +656,9 @@ | ||||
| /* Define to 1 if you have the `wcstombs' function. */ | ||||
| #undef HAVE_WCSTOMBS | ||||
| 
 | ||||
| /* Define to 1 if you have the `wcstombs_l' function. */ | ||||
| #undef HAVE_WCSTOMBS_L | ||||
| 
 | ||||
| /* Define to 1 if you have the <wctype.h> header file. */ | ||||
| #undef HAVE_WCTYPE_H | ||||
| 
 | ||||
|  | ||||
| @ -538,6 +538,9 @@ | ||||
| /* Define to 1 if you have the `wcstombs' function. */ | ||||
| #define HAVE_WCSTOMBS 1 | ||||
| 
 | ||||
| /* Define to 1 if you have the `wcstombs_l' function. */ | ||||
| #define HAVE_WCSTOMBS_L 1 | ||||
| 
 | ||||
| /* Define to 1 if you have the <wctype.h> header file. */ | ||||
| #define HAVE_WCTYPE_H 1 | ||||
| 
 | ||||
|  | ||||
| @ -304,6 +304,8 @@ typedef int pid_t; | ||||
| #define iswspace_l _iswspace_l | ||||
| #define strcoll_l _strcoll_l | ||||
| #define wcscoll_l _wcscoll_l | ||||
| #define wcstombs_l _wcstombs_l | ||||
| #define mbstowcs_l _mbstowcs_l | ||||
| 
 | ||||
| 
 | ||||
| /* In backend/port/win32/signal.c */ | ||||
|  | ||||
| @ -72,4 +72,12 @@ typedef int pg_locale_t; | ||||
| 
 | ||||
| extern pg_locale_t pg_newlocale_from_collation(Oid collid); | ||||
| 
 | ||||
| /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ | ||||
| #ifdef USE_WIDE_UPPER_LOWER | ||||
| extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, | ||||
| 		   pg_locale_t locale); | ||||
| extern size_t char2wchar(wchar_t *to, size_t tolen, | ||||
| 		   const char *from, size_t fromlen, pg_locale_t locale); | ||||
| #endif | ||||
| 
 | ||||
| #endif   /* _PG_LOCALE_ */ | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user