mirror of
https://github.com/postgres/postgres.git
synced 2025-05-21 00:02:53 -04:00
comment line where output as too long, and update typedefs for /lib directory. Also fix case where identifiers were used as variable names in the backend, but as typedefs in ecpg (favor the backend for indenting). Backpatch to 8.1.X.
802 lines
15 KiB
C
802 lines
15 KiB
C
/*
|
|
* Relevation
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
*/
|
|
#include "postgres.h"
|
|
#include <math.h>
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "utils/builtins.h"
|
|
#include "fmgr.h"
|
|
#include "funcapi.h"
|
|
#include "storage/bufpage.h"
|
|
#include "executor/spi.h"
|
|
#include "commands/trigger.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "catalog/namespace.h"
|
|
|
|
#include "utils/array.h"
|
|
|
|
#include "tsvector.h"
|
|
#include "query.h"
|
|
#include "common.h"
|
|
|
|
PG_FUNCTION_INFO_V1(rank);
|
|
Datum rank(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(rank_def);
|
|
Datum rank_def(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(rank_cd);
|
|
Datum rank_cd(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(rank_cd_def);
|
|
Datum rank_cd_def(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(get_covers);
|
|
Datum get_covers(PG_FUNCTION_ARGS);
|
|
|
|
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
|
|
|
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
|
|
|
|
#define DEF_NORM_METHOD 0
|
|
|
|
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
|
|
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
|
|
|
|
/*
|
|
* Returns a weight of a word collocation
|
|
*/
|
|
static float4
|
|
word_distance(int4 w)
|
|
{
|
|
if (w > 100)
|
|
return 1e-30;
|
|
|
|
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
|
|
}
|
|
|
|
static int
|
|
cnt_length(tsvector * t)
|
|
{
|
|
WordEntry *ptr = ARRPTR(t),
|
|
*end = (WordEntry *) STRPTR(t);
|
|
int len = 0,
|
|
clen;
|
|
|
|
while (ptr < end)
|
|
{
|
|
if ((clen = POSDATALEN(t, ptr)) == 0)
|
|
len += 1;
|
|
else
|
|
len += clen;
|
|
ptr++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int4
|
|
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
|
|
{
|
|
if (ptr->len == item->length)
|
|
return strncmp(
|
|
eval + ptr->pos,
|
|
qval + item->distance,
|
|
item->length);
|
|
|
|
return (ptr->len > item->length) ? 1 : -1;
|
|
}
|
|
|
|
static WordEntry *
|
|
find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
|
|
{
|
|
WordEntry *StopLow = ARRPTR(t);
|
|
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
|
WordEntry *StopMiddle;
|
|
int difference;
|
|
|
|
/* Loop invariant: StopLow <= item < StopHigh */
|
|
|
|
while (StopLow < StopHigh)
|
|
{
|
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
|
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
|
if (difference == 0)
|
|
return StopMiddle;
|
|
else if (difference < 0)
|
|
StopLow = StopMiddle + 1;
|
|
else
|
|
StopHigh = StopMiddle;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static char *SortAndUniqOperand = NULL;
|
|
|
|
static int
|
|
compareITEM(const void *a, const void *b)
|
|
{
|
|
if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
|
|
return strncmp(SortAndUniqOperand + (*(ITEM **) a)->distance,
|
|
SortAndUniqOperand + (*(ITEM **) b)->distance,
|
|
(*(ITEM **) b)->length);
|
|
|
|
return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
|
|
}
|
|
|
|
static ITEM **
|
|
SortAndUniqItems(char *operand, ITEM * item, int *size)
|
|
{
|
|
ITEM **res,
|
|
**ptr,
|
|
**prevptr;
|
|
|
|
ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
|
|
|
|
while ((*size)--)
|
|
{
|
|
if (item->type == VAL)
|
|
{
|
|
*ptr = item;
|
|
ptr++;
|
|
}
|
|
item++;
|
|
}
|
|
|
|
*size = ptr - res;
|
|
if (*size < 2)
|
|
return res;
|
|
|
|
SortAndUniqOperand = operand;
|
|
qsort(res, *size, sizeof(ITEM **), compareITEM);
|
|
|
|
ptr = res + 1;
|
|
prevptr = res;
|
|
|
|
while (ptr - res < *size)
|
|
{
|
|
if (compareITEM((void *) ptr, (void *) prevptr) != 0)
|
|
{
|
|
prevptr++;
|
|
*prevptr = *ptr;
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
*size = prevptr + 1 - res;
|
|
return res;
|
|
}
|
|
|
|
static WordEntryPos POSNULL[] = {
|
|
0,
|
|
0
|
|
};
|
|
|
|
static float
|
|
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
|
|
{
|
|
uint16 **pos;
|
|
int i,
|
|
k,
|
|
l,
|
|
p;
|
|
WordEntry *entry;
|
|
WordEntryPos *post,
|
|
*ct;
|
|
int4 dimt,
|
|
lenct,
|
|
dist;
|
|
float res = -1.0;
|
|
ITEM **item;
|
|
int size = q->size;
|
|
|
|
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
|
if (size < 2)
|
|
{
|
|
pfree(item);
|
|
return calc_rank_or(w, t, q);
|
|
}
|
|
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
|
memset(pos, 0, sizeof(uint16 *) * q->size);
|
|
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
|
|
|
|
for (i = 0; i < size; i++)
|
|
{
|
|
entry = find_wordentry(t, q, item[i]);
|
|
if (!entry)
|
|
continue;
|
|
|
|
if (entry->haspos)
|
|
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
|
|
else
|
|
pos[i] = (uint16 *) POSNULL;
|
|
|
|
|
|
dimt = *(uint16 *) (pos[i]);
|
|
post = (WordEntryPos *) (pos[i] + 1);
|
|
for (k = 0; k < i; k++)
|
|
{
|
|
if (!pos[k])
|
|
continue;
|
|
lenct = *(uint16 *) (pos[k]);
|
|
ct = (WordEntryPos *) (pos[k] + 1);
|
|
for (l = 0; l < dimt; l++)
|
|
{
|
|
for (p = 0; p < lenct; p++)
|
|
{
|
|
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
|
|
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
|
|
{
|
|
float curw;
|
|
|
|
if (!dist)
|
|
dist = MAXENTRYPOS;
|
|
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
|
|
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
pfree(pos);
|
|
pfree(item);
|
|
return res;
|
|
}
|
|
|
|
static float
|
|
calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
|
|
{
|
|
WordEntry *entry;
|
|
WordEntryPos *post;
|
|
int4 dimt,
|
|
j,
|
|
i;
|
|
float res = 0.0;
|
|
ITEM **item;
|
|
int size = q->size;
|
|
|
|
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
|
|
|
for (i = 0; i < size; i++)
|
|
{
|
|
float resj,
|
|
wjm;
|
|
int4 jm;
|
|
|
|
entry = find_wordentry(t, q, item[i]);
|
|
if (!entry)
|
|
continue;
|
|
|
|
if (entry->haspos)
|
|
{
|
|
dimt = POSDATALEN(t, entry);
|
|
post = POSDATAPTR(t, entry);
|
|
}
|
|
else
|
|
{
|
|
dimt = *(uint16 *) POSNULL;
|
|
post = POSNULL + 1;
|
|
}
|
|
|
|
resj = 0.0;
|
|
wjm = -1.0;
|
|
jm = 0;
|
|
for (j = 0; j < dimt; j++)
|
|
{
|
|
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
|
|
if (wpos(post[j]) > wjm)
|
|
{
|
|
wjm = wpos(post[j]);
|
|
jm = j;
|
|
}
|
|
}
|
|
/*
|
|
limit (sum(i/i^2),i->inf) = pi^2/6
|
|
resj = sum(wi/i^2),i=1,noccurence,
|
|
wi - should be sorted desc,
|
|
don't sort for now, just choose maximum weight. This should be corrected
|
|
Oleg Bartunov
|
|
*/
|
|
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
|
}
|
|
if (size > 0)
|
|
res = res / size;
|
|
pfree(item);
|
|
return res;
|
|
}
|
|
|
|
static float
|
|
calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
|
|
{
|
|
ITEM *item = GETQUERY(q);
|
|
float res = 0.0;
|
|
int len;
|
|
|
|
if (!t->size || !q->size)
|
|
return 0.0;
|
|
|
|
res = (item->type != VAL && item->val == (int4) '&') ?
|
|
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
|
|
|
if (res < 0)
|
|
res = 1e-20;
|
|
|
|
switch (method)
|
|
{
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
res /= log((float) (cnt_length(t) + 1)) / log(2.0);
|
|
break;
|
|
case 2:
|
|
len = cnt_length(t);
|
|
if (len > 0)
|
|
res /= (float) len;
|
|
break;
|
|
default:
|
|
/* internal error */
|
|
elog(ERROR, "unrecognized normalization method: %d", method);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
Datum
|
|
rank(PG_FUNCTION_ARGS)
|
|
{
|
|
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
|
int method = DEF_NORM_METHOD;
|
|
float res = 0.0;
|
|
float ws[lengthof(weights)];
|
|
int i;
|
|
|
|
if (ARR_NDIM(win) != 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array of weight must be one-dimensional")));
|
|
|
|
if (ARRNELEMS(win) < lengthof(weights))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array of weight is too short")));
|
|
|
|
for (i = 0; i < lengthof(weights); i++)
|
|
{
|
|
ws[i] = (((float4 *) ARR_DATA_PTR(win))[i] >= 0) ? ((float4 *) ARR_DATA_PTR(win))[i] : weights[i];
|
|
if (ws[i] > 1.0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("weight out of range")));
|
|
}
|
|
|
|
if (PG_NARGS() == 4)
|
|
method = PG_GETARG_INT32(3);
|
|
|
|
res = calc_rank(ws, txt, query, method);
|
|
|
|
PG_FREE_IF_COPY(win, 0);
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
PG_RETURN_FLOAT4(res);
|
|
}
|
|
|
|
Datum
|
|
rank_def(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
float res = 0.0;
|
|
int method = DEF_NORM_METHOD;
|
|
|
|
if (PG_NARGS() == 3)
|
|
method = PG_GETARG_INT32(2);
|
|
|
|
res = calc_rank(weights, txt, query, method);
|
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_FLOAT4(res);
|
|
}
|
|
|
|
|
|
typedef struct
|
|
{
|
|
ITEM *item;
|
|
int32 pos;
|
|
} DocRepresentation;
|
|
|
|
static int
|
|
compareDocR(const void *a, const void *b)
|
|
{
|
|
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
|
|
return 0;
|
|
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
|
|
}
|
|
|
|
|
|
typedef struct
|
|
{
|
|
DocRepresentation *doc;
|
|
int len;
|
|
} ChkDocR;
|
|
|
|
static bool
|
|
checkcondition_DR(void *checkval, ITEM * val)
|
|
{
|
|
DocRepresentation *ptr = ((ChkDocR *) checkval)->doc;
|
|
|
|
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
|
|
{
|
|
if (val == ptr->item || compareITEM(&val, &(ptr->item)) == 0)
|
|
return true;
|
|
ptr++;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
static bool
|
|
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
|
|
{
|
|
int i;
|
|
DocRepresentation *ptr,
|
|
*f = (DocRepresentation *) 0xffffffff;
|
|
ITEM *item = GETQUERY(query);
|
|
int lastpos = *pos;
|
|
int oldq = *q;
|
|
|
|
*p = 0x7fffffff;
|
|
*q = 0;
|
|
|
|
for (i = 0; i < query->size; i++)
|
|
{
|
|
if (item->type != VAL)
|
|
{
|
|
item++;
|
|
continue;
|
|
}
|
|
ptr = doc + *pos;
|
|
|
|
while (ptr - doc < len)
|
|
{
|
|
if (ptr->item == item)
|
|
{
|
|
if (ptr->pos > *q)
|
|
{
|
|
*q = ptr->pos;
|
|
lastpos = ptr - doc;
|
|
}
|
|
break;
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
item++;
|
|
}
|
|
|
|
if (*q == 0)
|
|
return false;
|
|
|
|
if (*q == oldq)
|
|
{ /* already check this pos */
|
|
(*pos)++;
|
|
return Cover(doc, len, query, pos, p, q);
|
|
}
|
|
|
|
item = GETQUERY(query);
|
|
for (i = 0; i < query->size; i++)
|
|
{
|
|
if (item->type != VAL)
|
|
{
|
|
item++;
|
|
continue;
|
|
}
|
|
ptr = doc + lastpos;
|
|
|
|
while (ptr >= doc + *pos)
|
|
{
|
|
if (ptr->item == item)
|
|
{
|
|
if (ptr->pos < *p)
|
|
{
|
|
*p = ptr->pos;
|
|
f = ptr;
|
|
}
|
|
break;
|
|
}
|
|
ptr--;
|
|
}
|
|
item++;
|
|
}
|
|
|
|
if (*p <= *q)
|
|
{
|
|
ChkDocR ch;
|
|
|
|
ch.doc = f;
|
|
ch.len = (doc + lastpos) - f + 1;
|
|
*pos = f - doc + 1;
|
|
SortAndUniqOperand = GETOPERAND(query);
|
|
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
|
|
{
|
|
/*
|
|
* elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);
|
|
*/
|
|
return true;
|
|
}
|
|
else
|
|
return Cover(doc, len, query, pos, p, q);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static DocRepresentation *
|
|
get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
|
|
{
|
|
ITEM *item = GETQUERY(query);
|
|
WordEntry *entry;
|
|
WordEntryPos *post;
|
|
int4 dimt,
|
|
j,
|
|
i;
|
|
int len = query->size * 4,
|
|
cur = 0;
|
|
DocRepresentation *doc;
|
|
|
|
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
|
for (i = 0; i < query->size; i++)
|
|
{
|
|
if (item[i].type != VAL)
|
|
continue;
|
|
|
|
entry = find_wordentry(txt, query, &(item[i]));
|
|
if (!entry)
|
|
continue;
|
|
|
|
if (entry->haspos)
|
|
{
|
|
dimt = POSDATALEN(txt, entry);
|
|
post = POSDATAPTR(txt, entry);
|
|
}
|
|
else
|
|
{
|
|
dimt = *(uint16 *) POSNULL;
|
|
post = POSNULL + 1;
|
|
}
|
|
|
|
while (cur + dimt >= len)
|
|
{
|
|
len *= 2;
|
|
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
|
}
|
|
|
|
for (j = 0; j < dimt; j++)
|
|
{
|
|
doc[cur].item = &(item[i]);
|
|
doc[cur].pos = WEP_GETPOS(post[j]);
|
|
cur++;
|
|
}
|
|
}
|
|
|
|
*doclen = cur;
|
|
|
|
if (cur > 0)
|
|
{
|
|
if (cur > 1)
|
|
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
|
return doc;
|
|
}
|
|
|
|
pfree(doc);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
Datum
|
|
rank_cd(PG_FUNCTION_ARGS)
|
|
{
|
|
int K = PG_GETARG_INT32(0);
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
|
int method = DEF_NORM_METHOD;
|
|
DocRepresentation *doc;
|
|
float res = 0.0;
|
|
int p = 0,
|
|
q = 0,
|
|
len,
|
|
cur;
|
|
|
|
doc = get_docrep(txt, query, &len);
|
|
if (!doc)
|
|
{
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
PG_RETURN_FLOAT4(0.0);
|
|
}
|
|
|
|
cur = 0;
|
|
if (K <= 0)
|
|
K = 4;
|
|
while (Cover(doc, len, query, &cur, &p, &q))
|
|
res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
|
|
|
|
if (PG_NARGS() == 4)
|
|
method = PG_GETARG_INT32(3);
|
|
|
|
switch (method)
|
|
{
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
res /= log((float) (cnt_length(txt) + 1));
|
|
break;
|
|
case 2:
|
|
len = cnt_length(txt);
|
|
if (len > 0)
|
|
res /= (float) len;
|
|
break;
|
|
default:
|
|
/* internal error */
|
|
elog(ERROR, "unrecognized normalization method: %d", method);
|
|
}
|
|
|
|
pfree(doc);
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
|
|
PG_RETURN_FLOAT4(res);
|
|
}
|
|
|
|
|
|
Datum
|
|
rank_cd_def(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(
|
|
rank_cd,
|
|
Int32GetDatum(-1),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
(PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
|
|
));
|
|
}
|
|
|
|
/**************debug*************/
|
|
|
|
typedef struct
|
|
{
|
|
char *w;
|
|
int2 len;
|
|
int2 pos;
|
|
int2 start;
|
|
int2 finish;
|
|
} DocWord;
|
|
|
|
static int
|
|
compareDocWord(const void *a, const void *b)
|
|
{
|
|
if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
|
|
return 0;
|
|
return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
|
|
}
|
|
|
|
|
|
Datum
|
|
get_covers(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
WordEntry *pptr = ARRPTR(txt);
|
|
int i,
|
|
dlen = 0,
|
|
j,
|
|
cur = 0,
|
|
len = 0,
|
|
rlen;
|
|
DocWord *dw,
|
|
*dwptr;
|
|
text *out;
|
|
char *cptr;
|
|
DocRepresentation *doc;
|
|
int pos = 0,
|
|
p,
|
|
q,
|
|
olddwpos = 0;
|
|
int ncover = 1;
|
|
|
|
doc = get_docrep(txt, query, &rlen);
|
|
|
|
if (!doc)
|
|
{
|
|
out = palloc(VARHDRSZ);
|
|
VARATT_SIZEP(out) = VARHDRSZ;
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
for (i = 0; i < txt->size; i++)
|
|
{
|
|
if (!pptr[i].haspos)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("no pos info")));
|
|
dlen += POSDATALEN(txt, &(pptr[i]));
|
|
}
|
|
|
|
dwptr = dw = palloc(sizeof(DocWord) * dlen);
|
|
memset(dw, 0, sizeof(DocWord) * dlen);
|
|
|
|
for (i = 0; i < txt->size; i++)
|
|
{
|
|
WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
|
|
|
|
for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
|
|
{
|
|
dw[cur].w = STRPTR(txt) + pptr[i].pos;
|
|
dw[cur].len = pptr[i].len;
|
|
dw[cur].pos = WEP_GETPOS(posdata[j]);
|
|
cur++;
|
|
}
|
|
len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
|
|
}
|
|
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
|
|
|
|
while (Cover(doc, rlen, query, &pos, &p, &q))
|
|
{
|
|
dwptr = dw + olddwpos;
|
|
while (dwptr->pos < p && dwptr - dw < dlen)
|
|
dwptr++;
|
|
olddwpos = dwptr - dw;
|
|
dwptr->start = ncover;
|
|
while (dwptr->pos < q + 1 && dwptr - dw < dlen)
|
|
dwptr++;
|
|
(dwptr - 1)->finish = ncover;
|
|
len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
|
|
ncover++;
|
|
}
|
|
|
|
out = palloc(VARHDRSZ + len);
|
|
cptr = ((char *) out) + VARHDRSZ;
|
|
dwptr = dw;
|
|
|
|
while (dwptr - dw < dlen)
|
|
{
|
|
if (dwptr->start)
|
|
{
|
|
sprintf(cptr, "{%d ", dwptr->start);
|
|
cptr = strchr(cptr, '\0');
|
|
}
|
|
memcpy(cptr, dwptr->w, dwptr->len);
|
|
cptr += dwptr->len;
|
|
*cptr = ' ';
|
|
cptr++;
|
|
if (dwptr->finish)
|
|
{
|
|
sprintf(cptr, "}%d ", dwptr->finish);
|
|
cptr = strchr(cptr, '\0');
|
|
}
|
|
dwptr++;
|
|
}
|
|
|
|
VARATT_SIZEP(out) = cptr - ((char *) out);
|
|
|
|
pfree(dw);
|
|
pfree(doc);
|
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_POINTER(out);
|
|
}
|