mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 00:03:57 -04:00 
			
		
		
		
	Reported-by: Michael Paquier Discussion: https://postgr.es/m/ZZKTDPxBBMt3C0J9@paquier.xyz Backpatch-through: 12
		
			
				
	
	
		
			260 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			260 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-------------------------------------------------------------------------
 | |
|  *
 | |
|  * dict_xsyn.c
 | |
|  *	  Extended synonym dictionary
 | |
|  *
 | |
|  * Copyright (c) 2007-2024, PostgreSQL Global Development Group
 | |
|  *
 | |
|  * IDENTIFICATION
 | |
|  *	  contrib/dict_xsyn/dict_xsyn.c
 | |
|  *
 | |
|  *-------------------------------------------------------------------------
 | |
|  */
 | |
| #include "postgres.h"
 | |
| 
 | |
| #include <ctype.h>
 | |
| 
 | |
| #include "commands/defrem.h"
 | |
| #include "tsearch/ts_locale.h"
 | |
| #include "tsearch/ts_utils.h"
 | |
| 
 | |
| PG_MODULE_MAGIC;
 | |
| 
 | |
| typedef struct
 | |
| {
 | |
| 	char	   *key;			/* Word */
 | |
| 	char	   *value;			/* Unparsed list of synonyms, including the
 | |
| 								 * word itself */
 | |
| } Syn;
 | |
| 
 | |
| typedef struct
 | |
| {
 | |
| 	int			len;
 | |
| 	Syn		   *syn;
 | |
| 
 | |
| 	bool		matchorig;
 | |
| 	bool		keeporig;
 | |
| 	bool		matchsynonyms;
 | |
| 	bool		keepsynonyms;
 | |
| } DictSyn;
 | |
| 
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(dxsyn_init);
 | |
| PG_FUNCTION_INFO_V1(dxsyn_lexize);
 | |
| 
 | |
| static char *
 | |
| find_word(char *in, char **end)
 | |
| {
 | |
| 	char	   *start;
 | |
| 
 | |
| 	*end = NULL;
 | |
| 	while (*in && t_isspace(in))
 | |
| 		in += pg_mblen(in);
 | |
| 
 | |
| 	if (!*in || *in == '#')
 | |
| 		return NULL;
 | |
| 	start = in;
 | |
| 
 | |
| 	while (*in && !t_isspace(in))
 | |
| 		in += pg_mblen(in);
 | |
| 
 | |
| 	*end = in;
 | |
| 
 | |
| 	return start;
 | |
| }
 | |
| 
 | |
| static int
 | |
| compare_syn(const void *a, const void *b)
 | |
| {
 | |
| 	return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
 | |
| }
 | |
| 
 | |
| static void
 | |
| read_dictionary(DictSyn *d, const char *filename)
 | |
| {
 | |
| 	char	   *real_filename = get_tsearch_config_filename(filename, "rules");
 | |
| 	tsearch_readline_state trst;
 | |
| 	char	   *line;
 | |
| 	int			cur = 0;
 | |
| 
 | |
| 	if (!tsearch_readline_begin(&trst, real_filename))
 | |
| 		ereport(ERROR,
 | |
| 				(errcode(ERRCODE_CONFIG_FILE_ERROR),
 | |
| 				 errmsg("could not open synonym file \"%s\": %m",
 | |
| 						real_filename)));
 | |
| 
 | |
| 	while ((line = tsearch_readline(&trst)) != NULL)
 | |
| 	{
 | |
| 		char	   *value;
 | |
| 		char	   *key;
 | |
| 		char	   *pos;
 | |
| 		char	   *end;
 | |
| 
 | |
| 		if (*line == '\0')
 | |
| 			continue;
 | |
| 
 | |
| 		value = lowerstr(line);
 | |
| 		pfree(line);
 | |
| 
 | |
| 		pos = value;
 | |
| 		while ((key = find_word(pos, &end)) != NULL)
 | |
| 		{
 | |
| 			/* Enlarge syn structure if full */
 | |
| 			if (cur == d->len)
 | |
| 			{
 | |
| 				d->len = (d->len > 0) ? 2 * d->len : 16;
 | |
| 				if (d->syn)
 | |
| 					d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
 | |
| 				else
 | |
| 					d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
 | |
| 			}
 | |
| 
 | |
| 			/* Save first word only if we will match it */
 | |
| 			if (pos != value || d->matchorig)
 | |
| 			{
 | |
| 				d->syn[cur].key = pnstrdup(key, end - key);
 | |
| 				d->syn[cur].value = pstrdup(value);
 | |
| 
 | |
| 				cur++;
 | |
| 			}
 | |
| 
 | |
| 			pos = end;
 | |
| 
 | |
| 			/* Don't bother scanning synonyms if we will not match them */
 | |
| 			if (!d->matchsynonyms)
 | |
| 				break;
 | |
| 		}
 | |
| 
 | |
| 		pfree(value);
 | |
| 	}
 | |
| 
 | |
| 	tsearch_readline_end(&trst);
 | |
| 
 | |
| 	d->len = cur;
 | |
| 	if (cur > 1)
 | |
| 		qsort(d->syn, d->len, sizeof(Syn), compare_syn);
 | |
| 
 | |
| 	pfree(real_filename);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| dxsyn_init(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
 | |
| 	DictSyn    *d;
 | |
| 	ListCell   *l;
 | |
| 	char	   *filename = NULL;
 | |
| 
 | |
| 	d = (DictSyn *) palloc0(sizeof(DictSyn));
 | |
| 	d->len = 0;
 | |
| 	d->syn = NULL;
 | |
| 	d->matchorig = true;
 | |
| 	d->keeporig = true;
 | |
| 	d->matchsynonyms = false;
 | |
| 	d->keepsynonyms = true;
 | |
| 
 | |
| 	foreach(l, dictoptions)
 | |
| 	{
 | |
| 		DefElem    *defel = (DefElem *) lfirst(l);
 | |
| 
 | |
| 		if (strcmp(defel->defname, "matchorig") == 0)
 | |
| 		{
 | |
| 			d->matchorig = defGetBoolean(defel);
 | |
| 		}
 | |
| 		else if (strcmp(defel->defname, "keeporig") == 0)
 | |
| 		{
 | |
| 			d->keeporig = defGetBoolean(defel);
 | |
| 		}
 | |
| 		else if (strcmp(defel->defname, "matchsynonyms") == 0)
 | |
| 		{
 | |
| 			d->matchsynonyms = defGetBoolean(defel);
 | |
| 		}
 | |
| 		else if (strcmp(defel->defname, "keepsynonyms") == 0)
 | |
| 		{
 | |
| 			d->keepsynonyms = defGetBoolean(defel);
 | |
| 		}
 | |
| 		else if (strcmp(defel->defname, "rules") == 0)
 | |
| 		{
 | |
| 			/* we can't read the rules before parsing all options! */
 | |
| 			filename = defGetString(defel);
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			ereport(ERROR,
 | |
| 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | |
| 					 errmsg("unrecognized xsyn parameter: \"%s\"",
 | |
| 							defel->defname)));
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (filename)
 | |
| 		read_dictionary(d, filename);
 | |
| 
 | |
| 	PG_RETURN_POINTER(d);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| dxsyn_lexize(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
 | |
| 	char	   *in = (char *) PG_GETARG_POINTER(1);
 | |
| 	int			length = PG_GETARG_INT32(2);
 | |
| 	Syn			word;
 | |
| 	Syn		   *found;
 | |
| 	TSLexeme   *res = NULL;
 | |
| 
 | |
| 	if (!length || d->len == 0)
 | |
| 		PG_RETURN_POINTER(NULL);
 | |
| 
 | |
| 	/* Create search pattern */
 | |
| 	{
 | |
| 		char	   *temp = pnstrdup(in, length);
 | |
| 
 | |
| 		word.key = lowerstr(temp);
 | |
| 		pfree(temp);
 | |
| 		word.value = NULL;
 | |
| 	}
 | |
| 
 | |
| 	/* Look for matching syn */
 | |
| 	found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
 | |
| 	pfree(word.key);
 | |
| 
 | |
| 	if (!found)
 | |
| 		PG_RETURN_POINTER(NULL);
 | |
| 
 | |
| 	/* Parse string of synonyms and return array of words */
 | |
| 	{
 | |
| 		char	   *value = found->value;
 | |
| 		char	   *syn;
 | |
| 		char	   *pos;
 | |
| 		char	   *end;
 | |
| 		int			nsyns = 0;
 | |
| 
 | |
| 		res = palloc(sizeof(TSLexeme));
 | |
| 
 | |
| 		pos = value;
 | |
| 		while ((syn = find_word(pos, &end)) != NULL)
 | |
| 		{
 | |
| 			res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
 | |
| 
 | |
| 			/* The first word is output only if keeporig=true */
 | |
| 			if (pos != value || d->keeporig)
 | |
| 			{
 | |
| 				res[nsyns].lexeme = pnstrdup(syn, end - syn);
 | |
| 				res[nsyns].nvariant = 0;
 | |
| 				res[nsyns].flags = 0;
 | |
| 				nsyns++;
 | |
| 			}
 | |
| 
 | |
| 			pos = end;
 | |
| 
 | |
| 			/* Stop if we are not to output the synonyms */
 | |
| 			if (!d->keepsynonyms)
 | |
| 				break;
 | |
| 		}
 | |
| 		res[nsyns].lexeme = NULL;
 | |
| 	}
 | |
| 
 | |
| 	PG_RETURN_POINTER(res);
 | |
| }
 |