mirror of
https://github.com/postgres/postgres.git
synced 2025-06-01 00:01:20 -04:00
1 Fix affixes with void replacement (AFAIK, it's only russian)
2 Optimize regex execution
This commit is contained in:
parent
153d5d31eb
commit
de55c0cef6
@ -1,4 +1,4 @@
|
|||||||
# $PostgreSQL: pgsql/contrib/tsearch2/ispell/Makefile,v 1.5 2003/11/29 19:51:36 pgsql Exp $
|
# $PostgreSQL: pgsql/contrib/tsearch2/ispell/Makefile,v 1.6 2004/06/23 11:06:11 teodor Exp $
|
||||||
|
|
||||||
subdir = contrib/tsearch2/ispell
|
subdir = contrib/tsearch2/ispell
|
||||||
top_builddir = ../../..
|
top_builddir = ../../..
|
||||||
@ -8,7 +8,7 @@ include $(top_builddir)/src/Makefile.global
|
|||||||
PG_CPPFLAGS = -I$(srcdir)/.. $(CPPFLAGS)
|
PG_CPPFLAGS = -I$(srcdir)/.. $(CPPFLAGS)
|
||||||
override CFLAGS += $(CFLAGS_SL)
|
override CFLAGS += $(CFLAGS_SL)
|
||||||
|
|
||||||
SUBOBJS = spell.o
|
SUBOBJS = spell.o regis.o
|
||||||
|
|
||||||
all: SUBSYS.o
|
all: SUBSYS.o
|
||||||
|
|
||||||
|
151
contrib/tsearch2/ispell/regis.c
Normal file
151
contrib/tsearch2/ispell/regis.c
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#include "regis.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
RS_isRegis(const char *str) {
|
||||||
|
unsigned char *ptr=(unsigned char *)str;
|
||||||
|
|
||||||
|
while(ptr && *ptr)
|
||||||
|
if ( isalpha(*ptr) || *ptr=='[' || *ptr==']' || *ptr=='^')
|
||||||
|
ptr++;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RS_IN_ONEOF 1
|
||||||
|
#define RS_IN_ONEOF_IN 2
|
||||||
|
#define RS_IN_NONEOF 3
|
||||||
|
#define RS_IN_WAIT 4
|
||||||
|
|
||||||
|
static RegisNode*
|
||||||
|
newRegisNode(RegisNode *prev, int len) {
|
||||||
|
RegisNode *ptr;
|
||||||
|
ptr = (RegisNode*)malloc(RNHDRSZ+len+1);
|
||||||
|
if (!ptr)
|
||||||
|
ts_error(ERROR, "No memory");
|
||||||
|
memset(ptr,0,RNHDRSZ+len+1);
|
||||||
|
if (prev)
|
||||||
|
prev->next=ptr;
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
RS_compile(Regis *r, int issuffix, const char *str) {
|
||||||
|
int i,len = strlen(str);
|
||||||
|
int state = RS_IN_WAIT;
|
||||||
|
RegisNode *ptr=NULL;
|
||||||
|
|
||||||
|
memset(r,0,sizeof(Regis));
|
||||||
|
r->issuffix = (issuffix) ? 1 : 0;
|
||||||
|
|
||||||
|
for(i=0;i<len;i++) {
|
||||||
|
unsigned char c = *( ( (unsigned char*)str ) + i );
|
||||||
|
if ( state == RS_IN_WAIT ) {
|
||||||
|
if ( isalpha(c) ) {
|
||||||
|
if ( ptr )
|
||||||
|
ptr = newRegisNode(ptr,len);
|
||||||
|
else
|
||||||
|
ptr = r->node = newRegisNode(NULL,len);
|
||||||
|
ptr->data[ 0 ] = c;
|
||||||
|
ptr->type = RSF_ONEOF;
|
||||||
|
ptr->len=1;
|
||||||
|
} else if ( c=='[' ) {
|
||||||
|
if ( ptr )
|
||||||
|
ptr = newRegisNode(ptr,len);
|
||||||
|
else
|
||||||
|
ptr = r->node = newRegisNode(NULL,len);
|
||||||
|
ptr->type = RSF_ONEOF;
|
||||||
|
state=RS_IN_ONEOF;
|
||||||
|
} else
|
||||||
|
ts_error(ERROR,"Error in regis: %s at pos %d\n", str, i+1);
|
||||||
|
} else if ( state == RS_IN_ONEOF ) {
|
||||||
|
if ( c=='^' ) {
|
||||||
|
ptr->type = RSF_NONEOF;
|
||||||
|
state=RS_IN_NONEOF;
|
||||||
|
} else if ( isalpha(c) ) {
|
||||||
|
ptr->data[ 0 ] = c;
|
||||||
|
ptr->len=1;
|
||||||
|
state=RS_IN_ONEOF_IN;
|
||||||
|
} else
|
||||||
|
ts_error(ERROR,"Error in regis: %s at pos %d\n", str, i+1);
|
||||||
|
} else if ( state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF ) {
|
||||||
|
if ( isalpha(c) ) {
|
||||||
|
ptr->data[ ptr->len ] = c;
|
||||||
|
ptr->len++;
|
||||||
|
} else if ( c==']' ) {
|
||||||
|
state=RS_IN_WAIT;
|
||||||
|
} else
|
||||||
|
ts_error(ERROR,"Error in regis: %s at pos %d\n", str, i+1);
|
||||||
|
} else
|
||||||
|
ts_error(ERROR,"Internal error in RS_compile: %d\n", state);
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = r->node;
|
||||||
|
while(ptr) {
|
||||||
|
r->nchar++;
|
||||||
|
ptr=ptr->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
RS_free(Regis *r) {
|
||||||
|
RegisNode *ptr=r->node,*tmp;
|
||||||
|
|
||||||
|
while(ptr) {
|
||||||
|
tmp=ptr->next;
|
||||||
|
free(ptr);
|
||||||
|
ptr = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
r->node = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
RS_execute(Regis *r, const char *str, int len) {
|
||||||
|
RegisNode *ptr=r->node;
|
||||||
|
unsigned char *c;
|
||||||
|
|
||||||
|
if (len<0)
|
||||||
|
len=strlen(str);
|
||||||
|
|
||||||
|
if (len<r->nchar)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ( r->issuffix )
|
||||||
|
c = ((unsigned char*)str) + len - r->nchar;
|
||||||
|
else
|
||||||
|
c = (unsigned char*)str;
|
||||||
|
|
||||||
|
while(ptr) {
|
||||||
|
switch(ptr->type) {
|
||||||
|
case RSF_ONEOF:
|
||||||
|
if ( ptr->len==0 ) {
|
||||||
|
if ( *c != *(ptr->data) )
|
||||||
|
return 0;
|
||||||
|
} else if ( strchr((char*)ptr->data, *c) == NULL )
|
||||||
|
return 0;
|
||||||
|
break;
|
||||||
|
case RSF_NONEOF:
|
||||||
|
if ( ptr->len==0 ) {
|
||||||
|
if ( *c == *(ptr->data) )
|
||||||
|
return 0;
|
||||||
|
} else if ( strchr((char*)ptr->data, *c) != NULL )
|
||||||
|
return 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ts_error(ERROR,"RS_execute: Unknown type node: %d\n", ptr->type);
|
||||||
|
}
|
||||||
|
ptr=ptr->next;
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
34
contrib/tsearch2/ispell/regis.h
Normal file
34
contrib/tsearch2/ispell/regis.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#ifndef __REGIS_H__
|
||||||
|
#define __REGIS_H__
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
typedef struct RegisNode {
|
||||||
|
uint32
|
||||||
|
type:2,
|
||||||
|
len:16,
|
||||||
|
unused:14;
|
||||||
|
struct RegisNode *next;
|
||||||
|
unsigned char data[1];
|
||||||
|
} RegisNode;
|
||||||
|
|
||||||
|
#define RNHDRSZ (sizeof(uint32)+sizeof(void*))
|
||||||
|
|
||||||
|
#define RSF_ONEOF 1
|
||||||
|
#define RSF_NONEOF 2
|
||||||
|
|
||||||
|
typedef struct Regis {
|
||||||
|
RegisNode *node;
|
||||||
|
uint32
|
||||||
|
issuffix:1,
|
||||||
|
nchar:16,
|
||||||
|
unused:15;
|
||||||
|
} Regis;
|
||||||
|
|
||||||
|
int RS_isRegis(const char *str);
|
||||||
|
|
||||||
|
int RS_compile(Regis *r, int issuffix, const char *str);
|
||||||
|
void RS_free(Regis *r);
|
||||||
|
/*×ÏÚ×ÒÁÝÁÅÔ 1 ÅÓÌÉ ÍÁÔÞÉÔÓÑ */
|
||||||
|
int RS_execute(Regis *r, const char *str, int len);
|
||||||
|
#endif
|
@ -190,24 +190,24 @@ FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly)
|
|||||||
{
|
{
|
||||||
SPNode *node = Conf->Dictionary;
|
SPNode *node = Conf->Dictionary;
|
||||||
SPNodeData *StopLow, *StopHigh, *StopMiddle;
|
SPNodeData *StopLow, *StopHigh, *StopMiddle;
|
||||||
int level=0, wrdlen=strlen(word);
|
uint8 *ptr =(uint8*)word;
|
||||||
|
|
||||||
while( node && level<wrdlen) {
|
while( node && *ptr) {
|
||||||
StopLow = node->data;
|
StopLow = node->data;
|
||||||
StopHigh = node->data+node->length;
|
StopHigh = node->data+node->length;
|
||||||
while (StopLow < StopHigh) {
|
while (StopLow < StopHigh) {
|
||||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
||||||
if ( StopMiddle->val == ((uint8*)(word))[level] ) {
|
if ( StopMiddle->val == *ptr ) {
|
||||||
if ( wrdlen==level+1 && StopMiddle->isword ) {
|
if ( *(ptr+1)=='\0' && StopMiddle->isword ) {
|
||||||
if ( compoundonly && !StopMiddle->compoundallow )
|
if ( compoundonly && !StopMiddle->compoundallow )
|
||||||
return 0;
|
return 0;
|
||||||
if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
|
if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
node=StopMiddle->node;
|
node=StopMiddle->node;
|
||||||
level++;
|
ptr++;
|
||||||
break;
|
break;
|
||||||
} else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
|
} else if ( StopMiddle->val < *ptr ) {
|
||||||
StopLow = StopMiddle + 1;
|
StopLow = StopMiddle + 1;
|
||||||
} else {
|
} else {
|
||||||
StopHigh = StopMiddle;
|
StopHigh = StopMiddle;
|
||||||
@ -236,19 +236,32 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
|
|||||||
}
|
}
|
||||||
MEMOUT(Conf->Affix);
|
MEMOUT(Conf->Affix);
|
||||||
}
|
}
|
||||||
if (type == 's')
|
|
||||||
sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
|
|
||||||
else
|
|
||||||
sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
|
|
||||||
Conf->Affix[Conf->naffixes].compile = 1;
|
|
||||||
Conf->Affix[Conf->naffixes].flagflags = flagflags;
|
|
||||||
Conf->Affix[Conf->naffixes].flag = flag;
|
|
||||||
Conf->Affix[Conf->naffixes].type = type;
|
|
||||||
|
|
||||||
strcpy(Conf->Affix[Conf->naffixes].find, find);
|
if ( strcmp(mask,".")==0 ) {
|
||||||
strcpy(Conf->Affix[Conf->naffixes].repl, repl);
|
Conf->Affix[Conf->naffixes].issimple=1;
|
||||||
Conf->Affix[Conf->naffixes].replen = strlen(repl);
|
Conf->Affix[Conf->naffixes].isregis=0;
|
||||||
Conf->naffixes++;
|
*( Conf->Affix[Conf->naffixes].mask )='\0';
|
||||||
|
} else if ( RS_isRegis(mask) ) {
|
||||||
|
Conf->Affix[Conf->naffixes].issimple=0;
|
||||||
|
Conf->Affix[Conf->naffixes].isregis=1;
|
||||||
|
strcpy(Conf->Affix[Conf->naffixes].mask, mask);
|
||||||
|
} else {
|
||||||
|
Conf->Affix[Conf->naffixes].issimple=0;
|
||||||
|
Conf->Affix[Conf->naffixes].isregis=0;
|
||||||
|
if (type == FF_SUFFIX)
|
||||||
|
sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
|
||||||
|
else
|
||||||
|
sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
|
||||||
|
}
|
||||||
|
Conf->Affix[Conf->naffixes].compile = 1;
|
||||||
|
Conf->Affix[Conf->naffixes].flagflags = flagflags;
|
||||||
|
Conf->Affix[Conf->naffixes].flag = flag;
|
||||||
|
Conf->Affix[Conf->naffixes].type = type;
|
||||||
|
|
||||||
|
strcpy(Conf->Affix[Conf->naffixes].find, find);
|
||||||
|
strcpy(Conf->Affix[Conf->naffixes].repl, repl);
|
||||||
|
Conf->Affix[Conf->naffixes].replen = strlen(repl);
|
||||||
|
Conf->naffixes++;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,7 +379,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? 's' : 'p');
|
NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
|
||||||
|
|
||||||
}
|
}
|
||||||
fclose(affix);
|
fclose(affix);
|
||||||
@ -550,6 +563,46 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) {
|
|||||||
return rs;
|
return rs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix) {
|
||||||
|
int i,cnt=0;
|
||||||
|
int start = (issuffix) ? startsuffix : 0;
|
||||||
|
int end = (issuffix) ? Conf->naffixes : startsuffix;
|
||||||
|
AffixNode *Affix = (AffixNode*)malloc( ANHRDSZ + sizeof(AffixNodeData));
|
||||||
|
|
||||||
|
MEMOUT(Affix);
|
||||||
|
memset(Affix, 0, ANHRDSZ + sizeof(AffixNodeData) );
|
||||||
|
Affix->length=1;
|
||||||
|
Affix->isvoid=1;
|
||||||
|
|
||||||
|
if (issuffix) {
|
||||||
|
Affix->data->node=Conf->Suffix;
|
||||||
|
Conf->Suffix = Affix;
|
||||||
|
} else {
|
||||||
|
Affix->data->node=Conf->Prefix;
|
||||||
|
Conf->Prefix = Affix;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for(i=start;i<end;i++)
|
||||||
|
if (Conf->Affix[i].replen==0)
|
||||||
|
cnt++;
|
||||||
|
|
||||||
|
if ( cnt==0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
Affix->data->aff = (AFFIX**)malloc( sizeof(AFFIX*) * cnt );
|
||||||
|
MEMOUT(Affix->data->aff);
|
||||||
|
Affix->data->naff = (uint32)cnt;
|
||||||
|
|
||||||
|
cnt=0;
|
||||||
|
for(i=start;i<end;i++)
|
||||||
|
if (Conf->Affix[i].replen==0) {
|
||||||
|
Affix->data->aff[cnt] = Conf->Affix + i;
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
NISortAffixes(IspellDict * Conf)
|
NISortAffixes(IspellDict * Conf)
|
||||||
{
|
{
|
||||||
@ -584,6 +637,8 @@ NISortAffixes(IspellDict * Conf)
|
|||||||
|
|
||||||
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p');
|
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p');
|
||||||
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
|
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
|
||||||
|
mkVoidAffix(Conf, 1, firstsuffix);
|
||||||
|
mkVoidAffix(Conf, 0, firstsuffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
static AffixNodeData*
|
static AffixNodeData*
|
||||||
@ -591,17 +646,23 @@ FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
|
|||||||
AffixNodeData *StopLow, *StopHigh, *StopMiddle;
|
AffixNodeData *StopLow, *StopHigh, *StopMiddle;
|
||||||
uint8 symbol;
|
uint8 symbol;
|
||||||
|
|
||||||
|
if ( node->isvoid ) { /* search void affixes */
|
||||||
|
if (node->data->naff)
|
||||||
|
return node->data;
|
||||||
|
node = node->data->node;
|
||||||
|
}
|
||||||
|
|
||||||
while( node && *level<wrdlen) {
|
while( node && *level<wrdlen) {
|
||||||
StopLow = node->data;
|
StopLow = node->data;
|
||||||
StopHigh = node->data+node->length;
|
StopHigh = node->data+node->length;
|
||||||
while (StopLow < StopHigh) {
|
while (StopLow < StopHigh) {
|
||||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
||||||
symbol = GETWCHAR(word,wrdlen,*level,type);
|
symbol = GETWCHAR(word,wrdlen,*level,type);
|
||||||
if ( StopMiddle->val == symbol ) {
|
if ( StopMiddle->val == symbol ) {
|
||||||
|
(*level)++;
|
||||||
if ( StopMiddle->naff )
|
if ( StopMiddle->naff )
|
||||||
return StopMiddle;
|
return StopMiddle;
|
||||||
node=StopMiddle->node;
|
node=StopMiddle->node;
|
||||||
(*level)++;
|
|
||||||
break;
|
break;
|
||||||
} else if ( StopMiddle->val < symbol ) {
|
} else if ( StopMiddle->val < symbol ) {
|
||||||
StopLow = StopMiddle + 1;
|
StopLow = StopMiddle + 1;
|
||||||
@ -617,11 +678,6 @@ FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
|
|||||||
|
|
||||||
static char *
|
static char *
|
||||||
CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
|
CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
|
||||||
regmatch_t subs[2]; /* workaround for apache&linux */
|
|
||||||
int err;
|
|
||||||
pg_wchar *data;
|
|
||||||
size_t data_len;
|
|
||||||
int dat_len;
|
|
||||||
|
|
||||||
if ( flagflags & FF_COMPOUNDONLYAFX ) {
|
if ( flagflags & FF_COMPOUNDONLYAFX ) {
|
||||||
if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
|
if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
|
||||||
@ -631,7 +687,7 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( Affix->type=='s' ) {
|
if ( Affix->type==FF_SUFFIX ) {
|
||||||
strcpy(newword, word);
|
strcpy(newword, word);
|
||||||
strcpy(newword + len - Affix->replen, Affix->find);
|
strcpy(newword + len - Affix->replen, Affix->find);
|
||||||
} else {
|
} else {
|
||||||
@ -639,34 +695,50 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
|
|||||||
strcat(newword, word + Affix->replen);
|
strcat(newword, word + Affix->replen);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Affix->compile)
|
if ( Affix->issimple ) {
|
||||||
{
|
return newword;
|
||||||
int wmasklen,masklen = strlen(Affix->mask);
|
} else if ( Affix->isregis ) {
|
||||||
pg_wchar *mask;
|
if (Affix->compile) {
|
||||||
mask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar));
|
RS_compile(&(Affix->reg.regis), (Affix->type==FF_SUFFIX) ? 1 : 0, Affix->mask);
|
||||||
wmasklen = pg_mb2wchar_with_len( Affix->mask, mask, masklen);
|
Affix->compile = 0;
|
||||||
|
}
|
||||||
err = pg_regcomp(&(Affix->reg), mask, wmasklen, REG_EXTENDED | REG_ICASE | REG_NOSUB);
|
if ( RS_execute(&(Affix->reg.regis), newword, -1) )
|
||||||
pfree(mask);
|
return newword;
|
||||||
if (err)
|
} else {
|
||||||
|
regmatch_t subs[2]; /* workaround for apache&linux */
|
||||||
|
int err;
|
||||||
|
pg_wchar *data;
|
||||||
|
size_t data_len;
|
||||||
|
int dat_len;
|
||||||
|
if (Affix->compile)
|
||||||
{
|
{
|
||||||
/* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
|
int wmasklen,masklen = strlen(Affix->mask);
|
||||||
pg_regfree(&(Affix->reg));
|
pg_wchar *mask;
|
||||||
return (NULL);
|
mask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar));
|
||||||
|
wmasklen = pg_mb2wchar_with_len( Affix->mask, mask, masklen);
|
||||||
|
|
||||||
|
err = pg_regcomp(&(Affix->reg.regex), mask, wmasklen, REG_EXTENDED | REG_ICASE | REG_NOSUB);
|
||||||
|
pfree(mask);
|
||||||
|
if (err)
|
||||||
|
{
|
||||||
|
/* regerror(err, &(Affix->reg.regex), regerrstr, ERRSTRSIZE); */
|
||||||
|
pg_regfree(&(Affix->reg.regex));
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
Affix->compile = 0;
|
||||||
}
|
}
|
||||||
Affix->compile = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Convert data string to wide characters */
|
/* Convert data string to wide characters */
|
||||||
dat_len = strlen(newword);
|
dat_len = strlen(newword);
|
||||||
data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
|
data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
|
||||||
data_len = pg_mb2wchar_with_len(newword, data, dat_len);
|
data_len = pg_mb2wchar_with_len(newword, data, dat_len);
|
||||||
|
|
||||||
if (!(err = pg_regexec(&(Affix->reg), data,dat_len,NULL, 1, subs, 0))) {
|
if (!(err = pg_regexec(&(Affix->reg.regex), data,dat_len,NULL, 1, subs, 0))) {
|
||||||
pfree(data);
|
pfree(data);
|
||||||
return newword;
|
return newword;
|
||||||
|
}
|
||||||
|
pfree(data);
|
||||||
}
|
}
|
||||||
pfree(data);
|
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -715,7 +787,6 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
pnode = prefix->node;
|
pnode = prefix->node;
|
||||||
plevel++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
|
/* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
|
||||||
@ -754,13 +825,11 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
pnode = prefix->node;
|
pnode = prefix->node;
|
||||||
plevel++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
snode=suffix->node;
|
snode=suffix->node;
|
||||||
slevel++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur == forms) {
|
if (cur == forms) {
|
||||||
@ -1013,8 +1082,12 @@ NIFree(IspellDict * Conf)
|
|||||||
|
|
||||||
for (i = 0; i < Conf->naffixes; i++)
|
for (i = 0; i < Conf->naffixes; i++)
|
||||||
{
|
{
|
||||||
if (Affix[i].compile == 0)
|
if (Affix[i].compile == 0) {
|
||||||
pg_regfree(&(Affix[i].reg));
|
if ( Affix[i].isregis )
|
||||||
|
RS_free(&(Affix[i].reg.regis));
|
||||||
|
else
|
||||||
|
pg_regfree(&(Affix[i].reg.regex));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (Conf->Spell) {
|
if (Conf->Spell) {
|
||||||
for (i = 0; i < Conf->nspell; i++)
|
for (i = 0; i < Conf->nspell; i++)
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "regex/regex.h"
|
#include "regex/regex.h"
|
||||||
|
#include "regis.h"
|
||||||
#include "c.h"
|
#include "c.h"
|
||||||
|
|
||||||
|
|
||||||
@ -40,20 +41,29 @@ typedef struct spell_struct
|
|||||||
|
|
||||||
typedef struct aff_struct
|
typedef struct aff_struct
|
||||||
{
|
{
|
||||||
char flag;
|
uint32
|
||||||
char flagflags;
|
flag:8,
|
||||||
char type;
|
type:2,
|
||||||
char mask[33];
|
compile:1,
|
||||||
char find[16];
|
flagflags:3,
|
||||||
char repl[16];
|
issimple:1,
|
||||||
regex_t reg;
|
isregis:1,
|
||||||
size_t replen;
|
unused:1,
|
||||||
char compile;
|
replen:16;
|
||||||
|
char mask[32];
|
||||||
|
char find[16];
|
||||||
|
char repl[16];
|
||||||
|
union {
|
||||||
|
regex_t regex;
|
||||||
|
Regis regis;
|
||||||
|
} reg;
|
||||||
} AFFIX;
|
} AFFIX;
|
||||||
|
|
||||||
#define FF_CROSSPRODUCT 0x01
|
#define FF_CROSSPRODUCT 0x01
|
||||||
#define FF_COMPOUNDWORD 0x02
|
#define FF_COMPOUNDWORD 0x02
|
||||||
#define FF_COMPOUNDONLYAFX 0x04
|
#define FF_COMPOUNDONLYAFX 0x04
|
||||||
|
#define FF_SUFFIX 2
|
||||||
|
#define FF_PREFIX 1
|
||||||
|
|
||||||
struct AffixNode;
|
struct AffixNode;
|
||||||
|
|
||||||
@ -66,18 +76,13 @@ typedef struct {
|
|||||||
} AffixNodeData;
|
} AffixNodeData;
|
||||||
|
|
||||||
typedef struct AffixNode {
|
typedef struct AffixNode {
|
||||||
uint32 length;
|
uint32 isvoid:1,
|
||||||
|
length:31;
|
||||||
AffixNodeData data[1];
|
AffixNodeData data[1];
|
||||||
} AffixNode;
|
} AffixNode;
|
||||||
|
|
||||||
#define ANHRDSZ (sizeof(uint32))
|
#define ANHRDSZ (sizeof(uint32))
|
||||||
|
|
||||||
typedef struct Tree_struct
|
|
||||||
{
|
|
||||||
int Left[256],
|
|
||||||
Right[256];
|
|
||||||
} Tree_struct;
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *affix;
|
char *affix;
|
||||||
int len;
|
int len;
|
||||||
|
@ -816,7 +816,7 @@ CREATE OPERATOR CLASS tsvector_ops
|
|||||||
FUNCTION 1 tsvector_cmp(tsvector, tsvector);
|
FUNCTION 1 tsvector_cmp(tsvector, tsvector);
|
||||||
|
|
||||||
--example of ISpell dictionary
|
--example of ISpell dictionary
|
||||||
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
|
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
|
||||||
--example of synonym dict
|
--example of synonym dict
|
||||||
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
|
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
|
||||||
END;
|
END;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user