mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 00:08:23 -05:00 
			
		
		
		
	ways. I'm not totally sure that I caught everything, but at least now they pass their regression tests with VARSIZE/SET_VARSIZE defined to reverse byte order.
		
			
				
	
	
		
			474 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			474 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
#include "hstore.h"
 | 
						|
#include <ctype.h>
 | 
						|
 | 
						|
PG_MODULE_MAGIC;
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	char	   *begin;
 | 
						|
	char	   *ptr;
 | 
						|
	char	   *cur;
 | 
						|
	char	   *word;
 | 
						|
	int			wordlen;
 | 
						|
 | 
						|
	Pairs	   *pairs;
 | 
						|
	int			pcur;
 | 
						|
	int			plen;
 | 
						|
}	HSParser;
 | 
						|
 | 
						|
#define RESIZEPRSBUF \
 | 
						|
do { \
 | 
						|
		if ( state->cur - state->word + 1 >= state->wordlen ) \
 | 
						|
		{ \
 | 
						|
				int4 clen = state->cur - state->word; \
 | 
						|
				state->wordlen *= 2; \
 | 
						|
				state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \
 | 
						|
				state->cur = state->word + clen; \
 | 
						|
		} \
 | 
						|
} while (0)
 | 
						|
 | 
						|
 | 
						|
#define GV_WAITVAL 0
 | 
						|
#define GV_INVAL 1
 | 
						|
#define GV_INESCVAL 2
 | 
						|
#define GV_WAITESCIN 3
 | 
						|
#define GV_WAITESCESCIN 4
 | 
						|
 | 
						|
static bool
 | 
						|
get_val(HSParser * state, bool ignoreeq, bool *escaped)
 | 
						|
{
 | 
						|
	int			st = GV_WAITVAL;
 | 
						|
 | 
						|
	state->wordlen = 32;
 | 
						|
	state->cur = state->word = palloc(state->wordlen);
 | 
						|
	*escaped = false;
 | 
						|
 | 
						|
	while (1)
 | 
						|
	{
 | 
						|
		if (st == GV_WAITVAL)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '"')
 | 
						|
			{
 | 
						|
				*escaped = true;
 | 
						|
				st = GV_INESCVAL;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\0')
 | 
						|
			{
 | 
						|
				return false;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '=' && !ignoreeq)
 | 
						|
			{
 | 
						|
				elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\\')
 | 
						|
			{
 | 
						|
				st = GV_WAITESCIN;
 | 
						|
			}
 | 
						|
			else if (!isspace((unsigned char) *(state->ptr)))
 | 
						|
			{
 | 
						|
				*(state->cur) = *(state->ptr);
 | 
						|
				state->cur++;
 | 
						|
				st = GV_INVAL;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else if (st == GV_INVAL)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '\\')
 | 
						|
			{
 | 
						|
				st = GV_WAITESCIN;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '=' && !ignoreeq)
 | 
						|
			{
 | 
						|
				state->ptr--;
 | 
						|
				return true;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == ',' && ignoreeq)
 | 
						|
			{
 | 
						|
				state->ptr--;
 | 
						|
				return true;
 | 
						|
			}
 | 
						|
			else if (isspace((unsigned char) *(state->ptr)))
 | 
						|
			{
 | 
						|
				return true;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\0')
 | 
						|
			{
 | 
						|
				state->ptr--;
 | 
						|
				return true;
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				RESIZEPRSBUF;
 | 
						|
				*(state->cur) = *(state->ptr);
 | 
						|
				state->cur++;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else if (st == GV_INESCVAL)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '\\')
 | 
						|
			{
 | 
						|
				st = GV_WAITESCESCIN;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '"')
 | 
						|
			{
 | 
						|
				return true;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\0')
 | 
						|
			{
 | 
						|
				elog(ERROR, "Unexpected end of string");
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				RESIZEPRSBUF;
 | 
						|
				*(state->cur) = *(state->ptr);
 | 
						|
				state->cur++;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else if (st == GV_WAITESCIN)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '\0')
 | 
						|
				elog(ERROR, "Unexpected end of string");
 | 
						|
			RESIZEPRSBUF;
 | 
						|
			*(state->cur) = *(state->ptr);
 | 
						|
			state->cur++;
 | 
						|
			st = GV_INVAL;
 | 
						|
		}
 | 
						|
		else if (st == GV_WAITESCESCIN)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '\0')
 | 
						|
				elog(ERROR, "Unexpected end of string");
 | 
						|
			RESIZEPRSBUF;
 | 
						|
			*(state->cur) = *(state->ptr);
 | 
						|
			state->cur++;
 | 
						|
			st = GV_INESCVAL;
 | 
						|
		}
 | 
						|
		else
 | 
						|
			elog(ERROR, "Unknown state %d at position line %d in file '%s'", st, __LINE__, __FILE__);
 | 
						|
 | 
						|
		state->ptr++;
 | 
						|
	}
 | 
						|
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
#define WKEY	0
 | 
						|
#define WVAL	1
 | 
						|
#define WEQ 2
 | 
						|
#define WGT 3
 | 
						|
#define WDEL	4
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
parse_hstore(HSParser * state)
 | 
						|
{
 | 
						|
	int			st = WKEY;
 | 
						|
	bool		escaped = false;
 | 
						|
 | 
						|
	state->plen = 16;
 | 
						|
	state->pairs = (Pairs *) palloc(sizeof(Pairs) * state->plen);
 | 
						|
	state->pcur = 0;
 | 
						|
	state->ptr = state->begin;
 | 
						|
	state->word = NULL;
 | 
						|
 | 
						|
	while (1)
 | 
						|
	{
 | 
						|
		if (st == WKEY)
 | 
						|
		{
 | 
						|
			if (!get_val(state, false, &escaped))
 | 
						|
				return;
 | 
						|
			if (state->pcur >= state->plen)
 | 
						|
			{
 | 
						|
				state->plen *= 2;
 | 
						|
				state->pairs = (Pairs *) repalloc(state->pairs, sizeof(Pairs) * state->plen);
 | 
						|
			}
 | 
						|
			state->pairs[state->pcur].key = state->word;
 | 
						|
			state->pairs[state->pcur].keylen = state->cur - state->word;
 | 
						|
			state->pairs[state->pcur].val = NULL;
 | 
						|
			state->word = NULL;
 | 
						|
			st = WEQ;
 | 
						|
		}
 | 
						|
		else if (st == WEQ)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '=')
 | 
						|
			{
 | 
						|
				st = WGT;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\0')
 | 
						|
			{
 | 
						|
				elog(ERROR, "Unexpected end of string");
 | 
						|
			}
 | 
						|
			else if (!isspace((unsigned char) *(state->ptr)))
 | 
						|
			{
 | 
						|
				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int4) (state->ptr - state->begin));
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else if (st == WGT)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == '>')
 | 
						|
			{
 | 
						|
				st = WVAL;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\0')
 | 
						|
			{
 | 
						|
				elog(ERROR, "Unexpected end of string");
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int4) (state->ptr - state->begin));
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else if (st == WVAL)
 | 
						|
		{
 | 
						|
			if (!get_val(state, true, &escaped))
 | 
						|
				elog(ERROR, "Unexpected end of string");
 | 
						|
			state->pairs[state->pcur].val = state->word;
 | 
						|
			state->pairs[state->pcur].vallen = state->cur - state->word;
 | 
						|
			state->pairs[state->pcur].isnull = false;
 | 
						|
			state->pairs[state->pcur].needfree = true;
 | 
						|
			if (state->cur - state->word == 4 && !escaped)
 | 
						|
			{
 | 
						|
				state->word[4] = '\0';
 | 
						|
				if (0 == pg_strcasecmp(state->word, "null"))
 | 
						|
					state->pairs[state->pcur].isnull = true;
 | 
						|
			}
 | 
						|
			state->word = NULL;
 | 
						|
			state->pcur++;
 | 
						|
			st = WDEL;
 | 
						|
		}
 | 
						|
		else if (st == WDEL)
 | 
						|
		{
 | 
						|
			if (*(state->ptr) == ',')
 | 
						|
			{
 | 
						|
				st = WKEY;
 | 
						|
			}
 | 
						|
			else if (*(state->ptr) == '\0')
 | 
						|
			{
 | 
						|
				return;
 | 
						|
			}
 | 
						|
			else if (!isspace((unsigned char) *(state->ptr)))
 | 
						|
			{
 | 
						|
				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int4) (state->ptr - state->begin));
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
			elog(ERROR, "Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__);
 | 
						|
 | 
						|
		state->ptr++;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
comparePairs(const void *a, const void *b)
 | 
						|
{
 | 
						|
	if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
 | 
						|
	{
 | 
						|
		int			res = strncmp(
 | 
						|
								  ((Pairs *) a)->key,
 | 
						|
								  ((Pairs *) b)->key,
 | 
						|
								  ((Pairs *) a)->keylen
 | 
						|
		);
 | 
						|
 | 
						|
		if (res)
 | 
						|
			return res;
 | 
						|
 | 
						|
		/* guarantee that needfree will be later */
 | 
						|
		if (((Pairs *) b)->needfree == ((Pairs *) a)->needfree)
 | 
						|
			return 0;
 | 
						|
		else if (((Pairs *) a)->needfree)
 | 
						|
			return 1;
 | 
						|
		else
 | 
						|
			return -1;
 | 
						|
	}
 | 
						|
	return (((Pairs *) a)->keylen > ((Pairs *) b)->keylen) ? 1 : -1;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
uniquePairs(Pairs * a, int4 l, int4 *buflen)
 | 
						|
{
 | 
						|
	Pairs	   *ptr,
 | 
						|
			   *res;
 | 
						|
 | 
						|
	*buflen = 0;
 | 
						|
	if (l < 2)
 | 
						|
	{
 | 
						|
		if (l == 1)
 | 
						|
			*buflen = a->keylen + ((a->isnull) ? 0 : a->vallen);
 | 
						|
		return l;
 | 
						|
	}
 | 
						|
 | 
						|
	qsort((void *) a, l, sizeof(Pairs), comparePairs);
 | 
						|
	ptr = a + 1;
 | 
						|
	res = a;
 | 
						|
	while (ptr - a < l)
 | 
						|
	{
 | 
						|
		if (ptr->keylen == res->keylen && strncmp(ptr->key, res->key, res->keylen) == 0)
 | 
						|
		{
 | 
						|
			if (ptr->needfree)
 | 
						|
			{
 | 
						|
				pfree(ptr->key);
 | 
						|
				pfree(ptr->val);
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			*buflen += res->keylen + ((res->isnull) ? 0 : res->vallen);
 | 
						|
			res++;
 | 
						|
			memcpy(res, ptr, sizeof(Pairs));
 | 
						|
		}
 | 
						|
 | 
						|
		ptr++;
 | 
						|
	}
 | 
						|
 | 
						|
	*buflen += res->keylen + ((res->isnull) ? 0 : res->vallen);
 | 
						|
	return res + 1 - a;
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
freeHSParse(HSParser * state)
 | 
						|
{
 | 
						|
	int			i;
 | 
						|
 | 
						|
	if (state->word)
 | 
						|
		pfree(state->word);
 | 
						|
	for (i = 0; i < state->pcur; i++)
 | 
						|
		if (state->pairs[i].needfree)
 | 
						|
		{
 | 
						|
			if (state->pairs[i].key)
 | 
						|
				pfree(state->pairs[i].key);
 | 
						|
			if (state->pairs[i].val)
 | 
						|
				pfree(state->pairs[i].val);
 | 
						|
		}
 | 
						|
	pfree(state->pairs);
 | 
						|
}
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(hstore_in);
 | 
						|
Datum		hstore_in(PG_FUNCTION_ARGS);
 | 
						|
Datum
 | 
						|
hstore_in(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	HSParser	state;
 | 
						|
	int4		len,
 | 
						|
				buflen,
 | 
						|
				i;
 | 
						|
	HStore	   *out;
 | 
						|
	HEntry	   *entries;
 | 
						|
	char	   *ptr;
 | 
						|
 | 
						|
	state.begin = PG_GETARG_CSTRING(0);
 | 
						|
 | 
						|
	parse_hstore(&state);
 | 
						|
 | 
						|
	if (state.pcur == 0)
 | 
						|
	{
 | 
						|
		freeHSParse(&state);
 | 
						|
		len = CALCDATASIZE(0, 0);
 | 
						|
		out = palloc(len);
 | 
						|
		SET_VARSIZE(out, len);
 | 
						|
		out->size = 0;
 | 
						|
		PG_RETURN_POINTER(out);
 | 
						|
	}
 | 
						|
 | 
						|
	state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);
 | 
						|
 | 
						|
	len = CALCDATASIZE(state.pcur, buflen);
 | 
						|
	out = palloc(len);
 | 
						|
	SET_VARSIZE(out, len);
 | 
						|
	out->size = state.pcur;
 | 
						|
 | 
						|
	entries = ARRPTR(out);
 | 
						|
	ptr = STRPTR(out);
 | 
						|
 | 
						|
	for (i = 0; i < out->size; i++)
 | 
						|
	{
 | 
						|
		entries[i].keylen = state.pairs[i].keylen;
 | 
						|
		entries[i].pos = ptr - STRPTR(out);
 | 
						|
		memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
 | 
						|
		ptr += entries[i].keylen;
 | 
						|
 | 
						|
		entries[i].valisnull = state.pairs[i].isnull;
 | 
						|
		if (entries[i].valisnull)
 | 
						|
			entries[i].vallen = 4;		/* null */
 | 
						|
		else
 | 
						|
		{
 | 
						|
			entries[i].vallen = state.pairs[i].vallen;
 | 
						|
			memcpy(ptr, state.pairs[i].val, state.pairs[i].vallen);
 | 
						|
			ptr += entries[i].vallen;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	freeHSParse(&state);
 | 
						|
	PG_RETURN_POINTER(out);
 | 
						|
}
 | 
						|
 | 
						|
static char *
 | 
						|
cpw(char *dst, char *src, int len)
 | 
						|
{
 | 
						|
	char	   *ptr = src;
 | 
						|
 | 
						|
	while (ptr - src < len)
 | 
						|
	{
 | 
						|
		if (*ptr == '"' || *ptr == '\\')
 | 
						|
			*dst++ = '\\';
 | 
						|
		*dst++ = *ptr++;
 | 
						|
	}
 | 
						|
	return dst;
 | 
						|
}
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(hstore_out);
 | 
						|
Datum		hstore_out(PG_FUNCTION_ARGS);
 | 
						|
Datum
 | 
						|
hstore_out(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	HStore	   *in = PG_GETARG_HS(0);
 | 
						|
	int			buflen,
 | 
						|
				i;
 | 
						|
	char	   *out,
 | 
						|
			   *ptr;
 | 
						|
	char	   *base = STRPTR(in);
 | 
						|
	HEntry	   *entries = ARRPTR(in);
 | 
						|
 | 
						|
	if (in->size == 0)
 | 
						|
	{
 | 
						|
		out = palloc(1);
 | 
						|
		*out = '\0';
 | 
						|
		PG_FREE_IF_COPY(in, 0);
 | 
						|
		PG_RETURN_CSTRING(out);
 | 
						|
	}
 | 
						|
 | 
						|
	buflen = (4 /* " */ + 2 /* => */ + 2 /* , */ ) * in->size +
 | 
						|
		2 /* esc */ * (VARSIZE(in) - CALCDATASIZE(in->size, 0));
 | 
						|
 | 
						|
	out = ptr = palloc(buflen);
 | 
						|
	for (i = 0; i < in->size; i++)
 | 
						|
	{
 | 
						|
		*ptr++ = '"';
 | 
						|
		ptr = cpw(ptr, base + entries[i].pos, entries[i].keylen);
 | 
						|
		*ptr++ = '"';
 | 
						|
		*ptr++ = '=';
 | 
						|
		*ptr++ = '>';
 | 
						|
		if (entries[i].valisnull)
 | 
						|
		{
 | 
						|
			*ptr++ = 'N';
 | 
						|
			*ptr++ = 'U';
 | 
						|
			*ptr++ = 'L';
 | 
						|
			*ptr++ = 'L';
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			*ptr++ = '"';
 | 
						|
			ptr = cpw(ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen);
 | 
						|
			*ptr++ = '"';
 | 
						|
		}
 | 
						|
 | 
						|
		if (i + 1 != in->size)
 | 
						|
		{
 | 
						|
			*ptr++ = ',';
 | 
						|
			*ptr++ = ' ';
 | 
						|
		}
 | 
						|
	}
 | 
						|
	*ptr = '\0';
 | 
						|
 | 
						|
	PG_FREE_IF_COPY(in, 0);
 | 
						|
	PG_RETURN_CSTRING(out);
 | 
						|
}
 |