mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-04 00:02:52 -05:00 
			
		
		
		
	Add GIN support for pg_trgm. From Guillaume Smet <guillaume.smet@gmail.com>
with minor editorization by me.
This commit is contained in:
		
							parent
							
								
									547e41cdf8
								
							
						
					
					
						commit
						15f91f2789
					
				@ -1,7 +1,7 @@
 | 
			
		||||
# $PostgreSQL: pgsql/contrib/pg_trgm/Makefile,v 1.6 2007/02/09 17:24:33 petere Exp $
 | 
			
		||||
# $PostgreSQL: pgsql/contrib/pg_trgm/Makefile,v 1.7 2007/03/14 14:15:40 teodor Exp $
 | 
			
		||||
 | 
			
		||||
MODULE_big = pg_trgm
 | 
			
		||||
OBJS = trgm_op.o trgm_gist.o 
 | 
			
		||||
OBJS = trgm_op.o trgm_gist.o trgm_gin.o
 | 
			
		||||
 | 
			
		||||
DATA_built = pg_trgm.sql
 | 
			
		||||
DATA = uninstall_pg_trgm.sql
 | 
			
		||||
 | 
			
		||||
@ -113,6 +113,8 @@ Tsearch2 Integration
 | 
			
		||||
	Next, create a trigram index on the word column:
 | 
			
		||||
 | 
			
		||||
	CREATE INDEX words_idx ON words USING gist(word gist_trgm_ops);
 | 
			
		||||
	or
 | 
			
		||||
	CREATE INDEX words_idx ON words USING gin(word gist_trgm_ops);
 | 
			
		||||
 | 
			
		||||
	Now, a SELECT query similar to the example above can be used to
 | 
			
		||||
	suggest spellings for misspelled words in user search terms. A
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -36,7 +36,7 @@ CREATE OPERATOR % (
 | 
			
		||||
        JOIN = contjoinsel
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
--gist key
 | 
			
		||||
-- gist key
 | 
			
		||||
CREATE FUNCTION gtrgm_in(cstring)
 | 
			
		||||
RETURNS gtrgm
 | 
			
		||||
AS 'MODULE_PATHNAME'
 | 
			
		||||
@ -53,7 +53,7 @@ CREATE TYPE gtrgm (
 | 
			
		||||
        OUTPUT = gtrgm_out
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
-- support functions
 | 
			
		||||
-- support functions for gist
 | 
			
		||||
CREATE FUNCTION gtrgm_consistent(gtrgm,internal,int4)
 | 
			
		||||
RETURNS bool
 | 
			
		||||
AS 'MODULE_PATHNAME'
 | 
			
		||||
@ -89,7 +89,7 @@ RETURNS internal
 | 
			
		||||
AS 'MODULE_PATHNAME'
 | 
			
		||||
LANGUAGE C;
 | 
			
		||||
 | 
			
		||||
-- create the operator class
 | 
			
		||||
-- create the operator class for gist
 | 
			
		||||
CREATE OPERATOR CLASS gist_trgm_ops
 | 
			
		||||
FOR TYPE text USING gist
 | 
			
		||||
AS
 | 
			
		||||
@ -103,5 +103,31 @@ AS
 | 
			
		||||
        FUNCTION        7       gtrgm_same (gtrgm, gtrgm, internal),
 | 
			
		||||
        STORAGE         gtrgm;
 | 
			
		||||
 | 
			
		||||
-- support functions for gin
 | 
			
		||||
CREATE FUNCTION gin_extract_trgm(text, internal)
 | 
			
		||||
RETURNS internal
 | 
			
		||||
AS 'MODULE_PATHNAME'
 | 
			
		||||
LANGUAGE C;
 | 
			
		||||
 | 
			
		||||
CREATE FUNCTION gin_extract_trgm(text, internal, internal)
 | 
			
		||||
RETURNS internal
 | 
			
		||||
AS 'MODULE_PATHNAME'
 | 
			
		||||
LANGUAGE C;
 | 
			
		||||
 | 
			
		||||
CREATE FUNCTION gin_trgm_consistent(internal, internal, text)
 | 
			
		||||
RETURNS internal
 | 
			
		||||
AS 'MODULE_PATHNAME'
 | 
			
		||||
LANGUAGE C;
 | 
			
		||||
 | 
			
		||||
-- create the operator class for gin
 | 
			
		||||
CREATE OPERATOR CLASS gin_trgm_ops
 | 
			
		||||
FOR TYPE text USING gin
 | 
			
		||||
AS
 | 
			
		||||
        OPERATOR        1       % (text, text) RECHECK,
 | 
			
		||||
        FUNCTION        1       btint4cmp (int4, int4),
 | 
			
		||||
        FUNCTION        2       gin_extract_trgm (text, internal),
 | 
			
		||||
        FUNCTION        3       gin_extract_trgm (text, internal, internal),
 | 
			
		||||
        FUNCTION        4       gin_trgm_consistent (internal, internal, text),
 | 
			
		||||
        STORAGE         int4;
 | 
			
		||||
 | 
			
		||||
COMMIT;
 | 
			
		||||
 | 
			
		||||
@ -28,3 +28,11 @@ select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu098
 | 
			
		||||
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
 | 
			
		||||
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
 | 
			
		||||
 | 
			
		||||
drop index trgm_idx;
 | 
			
		||||
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
 | 
			
		||||
set enable_seqscan=off;
 | 
			
		||||
 | 
			
		||||
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
 | 
			
		||||
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
 | 
			
		||||
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -28,6 +28,7 @@ typedef char trgm[3];
 | 
			
		||||
	*(((char*)(a))+2) = *(((char*)(b))+2);	\
 | 
			
		||||
} while(0);
 | 
			
		||||
 | 
			
		||||
#define TRGMINT(a) ( (*(((char*)(a))+2)<<16)+(*(((char*)(a))+1)<<8)+*(((char*)(a))+0) )
 | 
			
		||||
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										77
									
								
								contrib/pg_trgm/trgm_gin.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								contrib/pg_trgm/trgm_gin.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,77 @@
 | 
			
		||||
#include "trgm.h"
 | 
			
		||||
 | 
			
		||||
#include "access/gin.h"
 | 
			
		||||
#include "access/itup.h"
 | 
			
		||||
#include "access/tuptoaster.h"
 | 
			
		||||
#include "storage/bufpage.h"
 | 
			
		||||
#include "utils/array.h"
 | 
			
		||||
#include "utils/builtins.h"
 | 
			
		||||
 | 
			
		||||
PG_FUNCTION_INFO_V1(gin_extract_trgm);
 | 
			
		||||
Datum		gin_extract_trgm(PG_FUNCTION_ARGS);
 | 
			
		||||
 | 
			
		||||
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
 | 
			
		||||
Datum		gin_trgm_consistent(PG_FUNCTION_ARGS);
 | 
			
		||||
 | 
			
		||||
Datum
 | 
			
		||||
gin_extract_trgm(PG_FUNCTION_ARGS)
 | 
			
		||||
{
 | 
			
		||||
	text		*val = (text *) PG_GETARG_TEXT_P(0);
 | 
			
		||||
	int32		*nentries = (int32 *) PG_GETARG_POINTER(1);
 | 
			
		||||
	Datum		*entries = NULL;
 | 
			
		||||
	TRGM		*trg;
 | 
			
		||||
	int4		trglen;
 | 
			
		||||
	
 | 
			
		||||
	*nentries = 0;
 | 
			
		||||
	
 | 
			
		||||
	trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
 | 
			
		||||
	trglen = ARRNELEM(trg);
 | 
			
		||||
	
 | 
			
		||||
	if (trglen > 0)
 | 
			
		||||
	{
 | 
			
		||||
		trgm	*ptr;
 | 
			
		||||
		int4	i = 0,
 | 
			
		||||
				item;
 | 
			
		||||
		
 | 
			
		||||
		*nentries = (int32) trglen;
 | 
			
		||||
		entries = (Datum *) palloc(sizeof(Datum) * trglen);
 | 
			
		||||
 | 
			
		||||
		ptr = GETARR(trg);
 | 
			
		||||
		while (ptr - GETARR(trg) < ARRNELEM(trg))
 | 
			
		||||
		{
 | 
			
		||||
			item = TRGMINT(ptr);
 | 
			
		||||
			entries[i++] = Int32GetDatum(item);
 | 
			
		||||
			
 | 
			
		||||
			ptr++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	PG_RETURN_POINTER(entries);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Datum
 | 
			
		||||
gin_trgm_consistent(PG_FUNCTION_ARGS)
 | 
			
		||||
{
 | 
			
		||||
	bool		*check = (bool *) PG_GETARG_POINTER(0);
 | 
			
		||||
	text		*query = (text *) PG_GETARG_TEXT_P(2);
 | 
			
		||||
	bool		res = FALSE;
 | 
			
		||||
	TRGM		*trg;
 | 
			
		||||
	int4		i,
 | 
			
		||||
				trglen,
 | 
			
		||||
				ntrue = 0;
 | 
			
		||||
	
 | 
			
		||||
	trg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
 | 
			
		||||
	trglen = ARRNELEM(trg);
 | 
			
		||||
	
 | 
			
		||||
	for (i = 0; i < trglen; i++)
 | 
			
		||||
		if (check[i])
 | 
			
		||||
			ntrue ++;
 | 
			
		||||
 | 
			
		||||
#ifdef DIVUNION
 | 
			
		||||
	res = (trglen == ntrue) ? true : ((((((float4) ntrue) / ((float4) (trglen - ntrue)))) >= trgm_limit) ? true : false);
 | 
			
		||||
#else
 | 
			
		||||
	res = (trglen == 0) ? false : ((((((float4) ntrue) / ((float4) trglen))) >= trgm_limit) ? true : false);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	PG_RETURN_BOOL(res);
 | 
			
		||||
}
 | 
			
		||||
@ -20,6 +20,14 @@ DROP FUNCTION gtrgm_consistent(gtrgm,internal,int4);
 | 
			
		||||
 | 
			
		||||
DROP TYPE gtrgm CASCADE;
 | 
			
		||||
 | 
			
		||||
DROP OPERATOR CLASS gin_trgm_ops USING gin;
 | 
			
		||||
 | 
			
		||||
DROP FUNCTION gin_extract_trgm(text, internal);
 | 
			
		||||
 | 
			
		||||
DROP FUNCTION gin_extract_trgm(text, internal, internal);
 | 
			
		||||
 | 
			
		||||
DROP FUNCTION gin_trgm_consistent(internal, internal, text);
 | 
			
		||||
 | 
			
		||||
DROP OPERATOR % (text, text);
 | 
			
		||||
 | 
			
		||||
DROP FUNCTION similarity_op(text,text);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user