mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-04 00:02:52 -05:00 
			
		
		
		
	Move wchar2char() and char2wchar() from tsearch into /mb to be easier to
use for other modules; also move pnstrdup(). Clean up code slightly.
This commit is contained in:
		
							parent
							
								
									3eb9da524d
								
							
						
					
					
						commit
						9de09c087d
					
				@ -7,7 +7,7 @@
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 * IDENTIFICATION
 | 
			
		||||
 *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
 | 
			
		||||
 *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *-------------------------------------------------------------------------
 | 
			
		||||
 */
 | 
			
		||||
@ -16,125 +16,8 @@
 | 
			
		||||
#include "tsearch/ts_locale.h"
 | 
			
		||||
#include "tsearch/ts_public.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef USE_WIDE_UPPER_LOWER
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * wchar2char --- convert wide characters to multibyte format
 | 
			
		||||
 *
 | 
			
		||||
 * This has the same API as the standard wcstombs() function; in particular,
 | 
			
		||||
 * tolen is the maximum number of bytes to store at *to, and *from must be
 | 
			
		||||
 * zero-terminated.  The output will be zero-terminated iff there is room.
 | 
			
		||||
 */
 | 
			
		||||
size_t
 | 
			
		||||
wchar2char(char *to, const wchar_t *from, size_t tolen)
 | 
			
		||||
{
 | 
			
		||||
	if (tolen == 0)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
#ifdef WIN32
 | 
			
		||||
	if (GetDatabaseEncoding() == PG_UTF8)
 | 
			
		||||
	{
 | 
			
		||||
		int			r;
 | 
			
		||||
 | 
			
		||||
		r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
 | 
			
		||||
								NULL, NULL);
 | 
			
		||||
 | 
			
		||||
		if (r <= 0)
 | 
			
		||||
			return (size_t) -1;
 | 
			
		||||
 | 
			
		||||
		Assert(r <= tolen);
 | 
			
		||||
 | 
			
		||||
		/* Microsoft counts the zero terminator in the result */
 | 
			
		||||
		return r - 1;
 | 
			
		||||
	}
 | 
			
		||||
#endif   /* WIN32 */
 | 
			
		||||
 | 
			
		||||
	return wcstombs(to, from, tolen);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * char2wchar --- convert multibyte characters to wide characters
 | 
			
		||||
 *
 | 
			
		||||
 * This has almost the API of mbstowcs(), except that *from need not be
 | 
			
		||||
 * null-terminated; instead, the number of input bytes is specified as
 | 
			
		||||
 * fromlen.  Also, we ereport() rather than returning -1 for invalid
 | 
			
		||||
 * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
 | 
			
		||||
 * The output will be zero-terminated iff there is room.
 | 
			
		||||
 */
 | 
			
		||||
size_t
 | 
			
		||||
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
 | 
			
		||||
{
 | 
			
		||||
	if (tolen == 0)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
#ifdef WIN32
 | 
			
		||||
	if (GetDatabaseEncoding() == PG_UTF8)
 | 
			
		||||
	{
 | 
			
		||||
		int			r;
 | 
			
		||||
 | 
			
		||||
		/* stupid Microsloth API does not work for zero-length input */
 | 
			
		||||
		if (fromlen == 0)
 | 
			
		||||
			r = 0;
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
 | 
			
		||||
 | 
			
		||||
			if (r <= 0)
 | 
			
		||||
			{
 | 
			
		||||
				/* see notes in oracle_compat.c about error reporting */
 | 
			
		||||
				pg_verifymbstr(from, fromlen, false);
 | 
			
		||||
				ereport(ERROR,
 | 
			
		||||
						(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 | 
			
		||||
						 errmsg("invalid multibyte character for locale"),
 | 
			
		||||
						 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		Assert(r < tolen);
 | 
			
		||||
		to[r] = 0;
 | 
			
		||||
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
#endif   /* WIN32 */
 | 
			
		||||
 | 
			
		||||
	if (lc_ctype_is_c())
 | 
			
		||||
	{
 | 
			
		||||
		/*
 | 
			
		||||
		 * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
 | 
			
		||||
		 * allocated with sufficient space
 | 
			
		||||
		 */
 | 
			
		||||
		return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
	{
 | 
			
		||||
		/*
 | 
			
		||||
		 * mbstowcs requires ending '\0'
 | 
			
		||||
		 */
 | 
			
		||||
		char	   *str = pnstrdup(from, fromlen);
 | 
			
		||||
		size_t		result;
 | 
			
		||||
 | 
			
		||||
		result = mbstowcs(to, str, tolen);
 | 
			
		||||
 | 
			
		||||
		pfree(str);
 | 
			
		||||
 | 
			
		||||
		if (result == (size_t) -1)
 | 
			
		||||
		{
 | 
			
		||||
			pg_verifymbstr(from, fromlen, false);
 | 
			
		||||
			ereport(ERROR,
 | 
			
		||||
					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 | 
			
		||||
					 errmsg("invalid multibyte character for locale"),
 | 
			
		||||
					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (result < tolen)
 | 
			
		||||
			to[result] = 0;
 | 
			
		||||
 | 
			
		||||
		return result;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int
 | 
			
		||||
t_isdigit(const char *ptr)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
@ -7,7 +7,7 @@
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 * IDENTIFICATION
 | 
			
		||||
 *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
 | 
			
		||||
 *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *-------------------------------------------------------------------------
 | 
			
		||||
 */
 | 
			
		||||
@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
 | 
			
		||||
			bsearch(&key, s->stop, s->len,
 | 
			
		||||
					sizeof(char *), comparestr)) ? true : false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
char *
 | 
			
		||||
pnstrdup(const char *in, int len)
 | 
			
		||||
{
 | 
			
		||||
	char	   *out = palloc(len + 1);
 | 
			
		||||
 | 
			
		||||
	memcpy(out, in, len);
 | 
			
		||||
	out[len] = '\0';
 | 
			
		||||
	return out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -4,7 +4,7 @@
 | 
			
		||||
 * (currently mule internal code (mic) is used)
 | 
			
		||||
 * Tatsuo Ishii
 | 
			
		||||
 *
 | 
			
		||||
 * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
 | 
			
		||||
 * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 */
 | 
			
		||||
#include "postgres.h"
 | 
			
		||||
 | 
			
		||||
@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef USE_WIDE_UPPER_LOWER
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * wchar2char --- convert wide characters to multibyte format
 | 
			
		||||
 *
 | 
			
		||||
 * This has the same API as the standard wcstombs() function; in particular,
 | 
			
		||||
 * tolen is the maximum number of bytes to store at *to, and *from must be
 | 
			
		||||
 * zero-terminated.  The output will be zero-terminated iff there is room.
 | 
			
		||||
 */
 | 
			
		||||
size_t
 | 
			
		||||
wchar2char(char *to, const wchar_t *from, size_t tolen)
 | 
			
		||||
{
 | 
			
		||||
	size_t result;
 | 
			
		||||
	
 | 
			
		||||
	if (tolen == 0)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
#ifdef WIN32
 | 
			
		||||
	/*
 | 
			
		||||
	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
 | 
			
		||||
	 * and for some reason mbstowcs and wcstombs won't do this for us,
 | 
			
		||||
	 * so we use MultiByteToWideChar().
 | 
			
		||||
	 */
 | 
			
		||||
	if (GetDatabaseEncoding() == PG_UTF8)
 | 
			
		||||
	{
 | 
			
		||||
		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
 | 
			
		||||
								NULL, NULL);
 | 
			
		||||
		/* A zero return is failure */
 | 
			
		||||
		if (result <= 0)
 | 
			
		||||
			result = -1;
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			Assert(result <= tolen);
 | 
			
		||||
			/* Microsoft counts the zero terminator in the result */
 | 
			
		||||
			result--;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
#endif   /* WIN32 */
 | 
			
		||||
		result = wcstombs(to, from, tolen);
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * char2wchar --- convert multibyte characters to wide characters
 | 
			
		||||
 *
 | 
			
		||||
 * This has almost the API of mbstowcs(), except that *from need not be
 | 
			
		||||
 * null-terminated; instead, the number of input bytes is specified as
 | 
			
		||||
 * fromlen.  Also, we ereport() rather than returning -1 for invalid
 | 
			
		||||
 * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
 | 
			
		||||
 * The output will be zero-terminated iff there is room.
 | 
			
		||||
 */
 | 
			
		||||
size_t
 | 
			
		||||
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
 | 
			
		||||
{
 | 
			
		||||
	size_t		result;
 | 
			
		||||
 | 
			
		||||
	if (tolen == 0)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
#ifdef WIN32
 | 
			
		||||
	/* See WIN32 "Unicode" comment above */
 | 
			
		||||
	if (GetDatabaseEncoding() == PG_UTF8)
 | 
			
		||||
	{
 | 
			
		||||
		/* Win32 API does not work for zero-length input */
 | 
			
		||||
		if (fromlen == 0)
 | 
			
		||||
			result = 0;
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
 | 
			
		||||
			/* A zero return is failure */
 | 
			
		||||
			if (result == 0)
 | 
			
		||||
				result = -1;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (result != -1)
 | 
			
		||||
		{
 | 
			
		||||
			Assert(result < tolen);
 | 
			
		||||
			/* Append trailing null wchar (MultiByteToWideChar() does not) */
 | 
			
		||||
			to[result] = 0;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
#endif   /* WIN32 */
 | 
			
		||||
	{
 | 
			
		||||
		if (lc_ctype_is_c())
 | 
			
		||||
		{
 | 
			
		||||
			/*
 | 
			
		||||
			 * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
 | 
			
		||||
			 * allocated with sufficient space
 | 
			
		||||
			 */
 | 
			
		||||
			result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			/* mbstowcs requires ending '\0' */
 | 
			
		||||
			char	   *str = pnstrdup(from, fromlen);
 | 
			
		||||
 | 
			
		||||
			result = mbstowcs(to, str, tolen);
 | 
			
		||||
			pfree(str);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (result == -1)
 | 
			
		||||
	{
 | 
			
		||||
		/*
 | 
			
		||||
		 * Invalid multibyte character encountered.  We try to give a useful
 | 
			
		||||
		 * error message by letting pg_verifymbstr check the string.  But it's
 | 
			
		||||
		 * possible that the string is OK to us, and not OK to mbstowcs ---
 | 
			
		||||
		 * this suggests that the LC_CTYPE locale is different from the
 | 
			
		||||
		 * database encoding.  Give a generic error message if verifymbstr
 | 
			
		||||
		 * can't find anything wrong.
 | 
			
		||||
		 */
 | 
			
		||||
		pg_verifymbstr(from, fromlen, false);	/* might not return */
 | 
			
		||||
		/* but if it does ... */
 | 
			
		||||
		ereport(ERROR,
 | 
			
		||||
				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 | 
			
		||||
				 errmsg("invalid multibyte character for locale"),
 | 
			
		||||
				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
 | 
			
		||||
	}	
 | 
			
		||||
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* convert a multibyte string to a wchar */
 | 
			
		||||
int
 | 
			
		||||
pg_mb2wchar(const char *from, pg_wchar *to)
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,7 @@
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 * IDENTIFICATION
 | 
			
		||||
 *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
 | 
			
		||||
 *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *-------------------------------------------------------------------------
 | 
			
		||||
 */
 | 
			
		||||
@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
 | 
			
		||||
												 pointer, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Like pstrdup(), but append null byte */
 | 
			
		||||
char *
 | 
			
		||||
pnstrdup(const char *in, int len)
 | 
			
		||||
{
 | 
			
		||||
	char	   *out = palloc(len + 1);
 | 
			
		||||
 | 
			
		||||
	memcpy(out, in, len);
 | 
			
		||||
	out[len] = '\0';
 | 
			
		||||
	return out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * MemoryContextSwitchTo
 | 
			
		||||
 *		Returns the current context; installs the given context.
 | 
			
		||||
 | 
			
		||||
@ -6,7 +6,7 @@
 | 
			
		||||
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 | 
			
		||||
 * Portions Copyright (c) 1994, Regents of the University of California
 | 
			
		||||
 *
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *	NOTES
 | 
			
		||||
 *		This is used both by the backend and by libpq, but should not be
 | 
			
		||||
@ -362,6 +362,11 @@ extern int	pg_mbcharcliplen(const char *mbstr, int len, int imit);
 | 
			
		||||
extern int	pg_encoding_max_length(int encoding);
 | 
			
		||||
extern int	pg_database_encoding_max_length(void);
 | 
			
		||||
 | 
			
		||||
#ifdef USE_WIDE_UPPER_LOWER
 | 
			
		||||
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
 | 
			
		||||
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern void SetDefaultClientEncoding(void);
 | 
			
		||||
extern int	SetClientEncoding(int encoding, bool doit);
 | 
			
		||||
extern void InitializeClientEncoding(void);
 | 
			
		||||
 | 
			
		||||
@ -5,7 +5,7 @@
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright (c) 1998-2008, PostgreSQL Global Development Group
 | 
			
		||||
 *
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *-------------------------------------------------------------------------
 | 
			
		||||
 */
 | 
			
		||||
@ -33,9 +33,6 @@
 | 
			
		||||
 | 
			
		||||
#ifdef USE_WIDE_UPPER_LOWER
 | 
			
		||||
 | 
			
		||||
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
 | 
			
		||||
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
 | 
			
		||||
 | 
			
		||||
extern int	t_isdigit(const char *ptr);
 | 
			
		||||
extern int	t_isspace(const char *ptr);
 | 
			
		||||
extern int	t_isalpha(const char *ptr);
 | 
			
		||||
 | 
			
		||||
@ -6,7 +6,7 @@
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright (c) 1998-2008, PostgreSQL Global Development Group
 | 
			
		||||
 *
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *-------------------------------------------------------------------------
 | 
			
		||||
 */
 | 
			
		||||
@ -62,8 +62,6 @@ typedef struct
 | 
			
		||||
extern char *get_tsearch_config_filename(const char *basename,
 | 
			
		||||
							const char *extension);
 | 
			
		||||
 | 
			
		||||
extern char *pnstrdup(const char *in, int len);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Often useful stopword list management
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
@ -21,7 +21,7 @@
 | 
			
		||||
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 | 
			
		||||
 * Portions Copyright (c) 1994, Regents of the University of California
 | 
			
		||||
 *
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.38 2008/01/01 19:45:59 momjian Exp $
 | 
			
		||||
 * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.39 2008/06/18 18:42:54 momjian Exp $
 | 
			
		||||
 *
 | 
			
		||||
 *-------------------------------------------------------------------------
 | 
			
		||||
 */
 | 
			
		||||
@ -70,6 +70,8 @@ extern void pfree(void *pointer);
 | 
			
		||||
 | 
			
		||||
extern void *repalloc(void *pointer, Size size);
 | 
			
		||||
 | 
			
		||||
extern char *pnstrdup(const char *in, int len);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * MemoryContextSwitchTo can't be a macro in standard C compilers.
 | 
			
		||||
 * But we can make it an inline function when using GCC.
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user