mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-24 00:03:18 -04:00 
			
		
		
		
	The original coding examined the next character before verifying that there *is* a next character. In the worst case with the input buffer right up against the end of memory, this would result in a segfault. Problem spotted by Paul Guyot; this commit extends his patch to fix an additional case. In addition, make the code a tad more readable by not overloading the usage of *tlen.
		
			
				
	
	
		
			140 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			140 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-------------------------------------------------------------------------
 | |
|  *
 | |
|  * test_parser.c
 | |
|  *	  Simple example of a text search parser
 | |
|  *
 | |
|  * Copyright (c) 2007-2012, PostgreSQL Global Development Group
 | |
|  *
 | |
|  * IDENTIFICATION
 | |
|  *	  contrib/test_parser/test_parser.c
 | |
|  *
 | |
|  *-------------------------------------------------------------------------
 | |
|  */
 | |
| #include "postgres.h"
 | |
| 
 | |
| #include "fmgr.h"
 | |
| 
 | |
| PG_MODULE_MAGIC;
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * types
 | |
|  */
 | |
| 
 | |
| /* self-defined type */
 | |
| typedef struct
 | |
| {
 | |
| 	char	   *buffer;			/* text to parse */
 | |
| 	int			len;			/* length of the text in buffer */
 | |
| 	int			pos;			/* position of the parser */
 | |
| } ParserState;
 | |
| 
 | |
| /* copy-paste from wparser.h of tsearch2 */
 | |
| typedef struct
 | |
| {
 | |
| 	int			lexid;
 | |
| 	char	   *alias;
 | |
| 	char	   *descr;
 | |
| } LexDescr;
 | |
| 
 | |
| /*
 | |
|  * prototypes
 | |
|  */
 | |
| PG_FUNCTION_INFO_V1(testprs_start);
 | |
| Datum		testprs_start(PG_FUNCTION_ARGS);
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(testprs_getlexeme);
 | |
| Datum		testprs_getlexeme(PG_FUNCTION_ARGS);
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(testprs_end);
 | |
| Datum		testprs_end(PG_FUNCTION_ARGS);
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(testprs_lextype);
 | |
| Datum		testprs_lextype(PG_FUNCTION_ARGS);
 | |
| 
 | |
| /*
 | |
|  * functions
 | |
|  */
 | |
| Datum
 | |
| testprs_start(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
 | |
| 
 | |
| 	pst->buffer = (char *) PG_GETARG_POINTER(0);
 | |
| 	pst->len = PG_GETARG_INT32(1);
 | |
| 	pst->pos = 0;
 | |
| 
 | |
| 	PG_RETURN_POINTER(pst);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| testprs_getlexeme(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
 | |
| 	char	  **t = (char **) PG_GETARG_POINTER(1);
 | |
| 	int		   *tlen = (int *) PG_GETARG_POINTER(2);
 | |
| 	int			startpos = pst->pos;
 | |
| 	int			type;
 | |
| 
 | |
| 	*t = pst->buffer + pst->pos;
 | |
| 
 | |
| 	if (pst->pos < pst->len &&
 | |
| 		(pst->buffer)[pst->pos] == ' ')
 | |
| 	{
 | |
| 		/* blank type */
 | |
| 		type = 12;
 | |
| 		/* go to the next non-space character */
 | |
| 		while (pst->pos < pst->len &&
 | |
| 			   (pst->buffer)[pst->pos] == ' ')
 | |
| 			(pst->pos)++;
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		/* word type */
 | |
| 		type = 3;
 | |
| 		/* go to the next space character */
 | |
| 		while (pst->pos < pst->len &&
 | |
| 			   (pst->buffer)[pst->pos] != ' ')
 | |
| 			(pst->pos)++;
 | |
| 	}
 | |
| 
 | |
| 	*tlen = pst->pos - startpos;
 | |
| 
 | |
| 	/* we are finished if (*tlen == 0) */
 | |
| 	if (*tlen == 0)
 | |
| 		type = 0;
 | |
| 
 | |
| 	PG_RETURN_INT32(type);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| testprs_end(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
 | |
| 
 | |
| 	pfree(pst);
 | |
| 	PG_RETURN_VOID();
 | |
| }
 | |
| 
 | |
| Datum
 | |
| testprs_lextype(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	/*
 | |
| 	 * Remarks: - we have to return the blanks for headline reason - we use
 | |
| 	 * the same lexids like Teodor in the default word parser; in this way we
 | |
| 	 * can reuse the headline function of the default word parser.
 | |
| 	 */
 | |
| 	LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1));
 | |
| 
 | |
| 	/* there are only two types in this parser */
 | |
| 	descr[0].lexid = 3;
 | |
| 	descr[0].alias = pstrdup("word");
 | |
| 	descr[0].descr = pstrdup("Word");
 | |
| 	descr[1].lexid = 12;
 | |
| 	descr[1].alias = pstrdup("blank");
 | |
| 	descr[1].descr = pstrdup("Space symbols");
 | |
| 	descr[2].lexid = 0;
 | |
| 
 | |
| 	PG_RETURN_POINTER(descr);
 | |
| }
 |