mirror of
https://github.com/postgres/postgres.git
synced 2025-10-11 00:03:02 -04:00
Fix a couple more places where an explicit Datum conversion is needed (not clear how we missed these in ff89e182d and previous commits). Replace the minority usage "(Datum) NULL" with "(Datum) 0". The former depends on the assumption that Datum is the same width as Pointer, the latter doesn't. Anyway consistency is a good thing. This is, I believe, the last of the notational mop-up needed before we can consider changing Datum to uint64 everywhere. It's also important cleanup for more aggressive ideas such as making Datum a struct. Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us Discussion: https://postgr.es/m/8246d7ff-f4b7-4363-913e-827dadfeb145@eisentraut.org
559 lines
12 KiB
C
559 lines
12 KiB
C
/*
|
|
* contrib/ltree/_ltree_gist.c
|
|
*
|
|
*
|
|
* GiST support for ltree[]
|
|
* Teodor Sigaev <teodor@stack.net>
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include "access/gist.h"
|
|
#include "access/reloptions.h"
|
|
#include "access/stratnum.h"
|
|
#include "crc32.h"
|
|
#include "ltree.h"
|
|
#include "port/pg_bitutils.h"
|
|
#include "utils/array.h"
|
|
|
|
PG_FUNCTION_INFO_V1(_ltree_compress);
|
|
PG_FUNCTION_INFO_V1(_ltree_same);
|
|
PG_FUNCTION_INFO_V1(_ltree_union);
|
|
PG_FUNCTION_INFO_V1(_ltree_penalty);
|
|
PG_FUNCTION_INFO_V1(_ltree_picksplit);
|
|
PG_FUNCTION_INFO_V1(_ltree_consistent);
|
|
PG_FUNCTION_INFO_V1(_ltree_gist_options);
|
|
|
|
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
|
|
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
|
|
|
|
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
|
|
|
|
|
|
static void
|
|
hashing(BITVECP sign, ltree *t, int siglen)
|
|
{
|
|
int tlen = t->numlevel;
|
|
ltree_level *cur = LTREE_FIRST(t);
|
|
int hash;
|
|
|
|
while (tlen > 0)
|
|
{
|
|
hash = ltree_crc32_sz(cur->name, cur->len);
|
|
AHASH(sign, hash, siglen);
|
|
cur = LEVEL_NEXT(cur);
|
|
tlen--;
|
|
}
|
|
}
|
|
|
|
Datum
|
|
_ltree_compress(PG_FUNCTION_ARGS)
|
|
{
|
|
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
|
GISTENTRY *retval = entry;
|
|
int siglen = LTREE_GET_ASIGLEN();
|
|
|
|
if (entry->leafkey)
|
|
{ /* ltree */
|
|
ltree_gist *key;
|
|
ArrayType *val = DatumGetArrayTypeP(entry->key);
|
|
int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
|
|
ltree *item = (ltree *) ARR_DATA_PTR(val);
|
|
|
|
if (ARR_NDIM(val) > 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array must be one-dimensional")));
|
|
if (array_contains_nulls(val))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
|
errmsg("array must not contain nulls")));
|
|
|
|
key = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
|
|
|
|
while (num > 0)
|
|
{
|
|
hashing(LTG_SIGN(key), item, siglen);
|
|
num--;
|
|
item = NEXTVAL(item);
|
|
}
|
|
|
|
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
|
gistentryinit(*retval, PointerGetDatum(key),
|
|
entry->rel, entry->page,
|
|
entry->offset, false);
|
|
}
|
|
else if (!LTG_ISALLTRUE(DatumGetPointer(entry->key)))
|
|
{
|
|
int32 i;
|
|
ltree_gist *key;
|
|
BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
|
|
|
|
ALOOPBYTE(siglen)
|
|
{
|
|
if ((sign[i] & 0xff) != 0xff)
|
|
PG_RETURN_POINTER(retval);
|
|
}
|
|
|
|
key = ltree_gist_alloc(true, sign, siglen, NULL, NULL);
|
|
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
|
gistentryinit(*retval, PointerGetDatum(key),
|
|
entry->rel, entry->page,
|
|
entry->offset, false);
|
|
}
|
|
PG_RETURN_POINTER(retval);
|
|
}
|
|
|
|
Datum
|
|
_ltree_same(PG_FUNCTION_ARGS)
|
|
{
|
|
ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
|
|
ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
|
|
bool *result = (bool *) PG_GETARG_POINTER(2);
|
|
int siglen = LTREE_GET_ASIGLEN();
|
|
|
|
if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
|
|
*result = true;
|
|
else if (LTG_ISALLTRUE(a))
|
|
*result = false;
|
|
else if (LTG_ISALLTRUE(b))
|
|
*result = false;
|
|
else
|
|
{
|
|
int32 i;
|
|
BITVECP sa = LTG_SIGN(a),
|
|
sb = LTG_SIGN(b);
|
|
|
|
*result = true;
|
|
ALOOPBYTE(siglen)
|
|
{
|
|
if (sa[i] != sb[i])
|
|
{
|
|
*result = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
static int32
|
|
unionkey(BITVECP sbase, ltree_gist *add, int siglen)
|
|
{
|
|
int32 i;
|
|
BITVECP sadd = LTG_SIGN(add);
|
|
|
|
if (LTG_ISALLTRUE(add))
|
|
return 1;
|
|
|
|
ALOOPBYTE(siglen)
|
|
sbase[i] |= sadd[i];
|
|
return 0;
|
|
}
|
|
|
|
Datum
|
|
_ltree_union(PG_FUNCTION_ARGS)
|
|
{
|
|
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
|
int *size = (int *) PG_GETARG_POINTER(1);
|
|
int siglen = LTREE_GET_ASIGLEN();
|
|
int32 i;
|
|
ltree_gist *result = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
|
|
BITVECP base = LTG_SIGN(result);
|
|
|
|
for (i = 0; i < entryvec->n; i++)
|
|
{
|
|
if (unionkey(base, GETENTRY(entryvec, i), siglen))
|
|
{
|
|
result->flag |= LTG_ALLTRUE;
|
|
SET_VARSIZE(result, LTG_HDRSIZE);
|
|
break;
|
|
}
|
|
}
|
|
|
|
*size = VARSIZE(result);
|
|
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
static int32
|
|
sizebitvec(BITVECP sign, int siglen)
|
|
{
|
|
return pg_popcount((const char *) sign, siglen);
|
|
}
|
|
|
|
static int
|
|
hemdistsign(BITVECP a, BITVECP b, int siglen)
|
|
{
|
|
int i,
|
|
diff,
|
|
dist = 0;
|
|
|
|
ALOOPBYTE(siglen)
|
|
{
|
|
diff = (unsigned char) (a[i] ^ b[i]);
|
|
/* Using the popcount functions here isn't likely to win */
|
|
dist += pg_number_of_ones[diff];
|
|
}
|
|
return dist;
|
|
}
|
|
|
|
static int
|
|
hemdist(ltree_gist *a, ltree_gist *b, int siglen)
|
|
{
|
|
if (LTG_ISALLTRUE(a))
|
|
{
|
|
if (LTG_ISALLTRUE(b))
|
|
return 0;
|
|
else
|
|
return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(b), siglen);
|
|
}
|
|
else if (LTG_ISALLTRUE(b))
|
|
return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
|
|
|
|
return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
|
|
}
|
|
|
|
|
|
Datum
|
|
_ltree_penalty(PG_FUNCTION_ARGS)
|
|
{
|
|
ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
|
|
ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
|
|
float *penalty = (float *) PG_GETARG_POINTER(2);
|
|
int siglen = LTREE_GET_ASIGLEN();
|
|
|
|
*penalty = hemdist(origval, newval, siglen);
|
|
PG_RETURN_POINTER(penalty);
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
OffsetNumber pos;
|
|
int32 cost;
|
|
} SPLITCOST;
|
|
|
|
static int
|
|
comparecost(const void *a, const void *b)
|
|
{
|
|
return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
|
|
}
|
|
|
|
Datum
|
|
_ltree_picksplit(PG_FUNCTION_ARGS)
|
|
{
|
|
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
|
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
|
|
int siglen = LTREE_GET_ASIGLEN();
|
|
OffsetNumber k,
|
|
j;
|
|
ltree_gist *datum_l,
|
|
*datum_r;
|
|
BITVECP union_l,
|
|
union_r;
|
|
int32 size_alpha,
|
|
size_beta;
|
|
int32 size_waste,
|
|
waste = -1;
|
|
int32 nbytes;
|
|
OffsetNumber seed_1 = 0,
|
|
seed_2 = 0;
|
|
OffsetNumber *left,
|
|
*right;
|
|
OffsetNumber maxoff;
|
|
BITVECP ptr;
|
|
int i;
|
|
SPLITCOST *costvector;
|
|
ltree_gist *_k,
|
|
*_j;
|
|
|
|
maxoff = entryvec->n - 2;
|
|
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
|
v->spl_left = (OffsetNumber *) palloc(nbytes);
|
|
v->spl_right = (OffsetNumber *) palloc(nbytes);
|
|
|
|
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
|
|
{
|
|
_k = GETENTRY(entryvec, k);
|
|
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
|
|
{
|
|
size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
|
|
if (size_waste > waste)
|
|
{
|
|
waste = size_waste;
|
|
seed_1 = k;
|
|
seed_2 = j;
|
|
}
|
|
}
|
|
}
|
|
|
|
left = v->spl_left;
|
|
v->spl_nleft = 0;
|
|
right = v->spl_right;
|
|
v->spl_nright = 0;
|
|
|
|
if (seed_1 == 0 || seed_2 == 0)
|
|
{
|
|
seed_1 = 1;
|
|
seed_2 = 2;
|
|
}
|
|
|
|
/* form initial .. */
|
|
datum_l = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)),
|
|
LTG_SIGN(GETENTRY(entryvec, seed_1)),
|
|
siglen, NULL, NULL);
|
|
|
|
datum_r = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)),
|
|
LTG_SIGN(GETENTRY(entryvec, seed_2)),
|
|
siglen, NULL, NULL);
|
|
|
|
maxoff = OffsetNumberNext(maxoff);
|
|
/* sort before ... */
|
|
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
|
|
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
|
|
{
|
|
costvector[j - 1].pos = j;
|
|
_j = GETENTRY(entryvec, j);
|
|
size_alpha = hemdist(datum_l, _j, siglen);
|
|
size_beta = hemdist(datum_r, _j, siglen);
|
|
costvector[j - 1].cost = abs(size_alpha - size_beta);
|
|
}
|
|
qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
|
|
|
|
union_l = LTG_SIGN(datum_l);
|
|
union_r = LTG_SIGN(datum_r);
|
|
|
|
for (k = 0; k < maxoff; k++)
|
|
{
|
|
j = costvector[k].pos;
|
|
if (j == seed_1)
|
|
{
|
|
*left++ = j;
|
|
v->spl_nleft++;
|
|
continue;
|
|
}
|
|
else if (j == seed_2)
|
|
{
|
|
*right++ = j;
|
|
v->spl_nright++;
|
|
continue;
|
|
}
|
|
_j = GETENTRY(entryvec, j);
|
|
size_alpha = hemdist(datum_l, _j, siglen);
|
|
size_beta = hemdist(datum_r, _j, siglen);
|
|
|
|
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
|
|
{
|
|
if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
|
|
{
|
|
if (!LTG_ISALLTRUE(datum_l))
|
|
memset(union_l, 0xff, siglen);
|
|
}
|
|
else
|
|
{
|
|
ptr = LTG_SIGN(_j);
|
|
ALOOPBYTE(siglen)
|
|
union_l[i] |= ptr[i];
|
|
}
|
|
*left++ = j;
|
|
v->spl_nleft++;
|
|
}
|
|
else
|
|
{
|
|
if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
|
|
{
|
|
if (!LTG_ISALLTRUE(datum_r))
|
|
memset(union_r, 0xff, siglen);
|
|
}
|
|
else
|
|
{
|
|
ptr = LTG_SIGN(_j);
|
|
ALOOPBYTE(siglen)
|
|
union_r[i] |= ptr[i];
|
|
}
|
|
*right++ = j;
|
|
v->spl_nright++;
|
|
}
|
|
}
|
|
|
|
*right = *left = FirstOffsetNumber;
|
|
|
|
v->spl_ldatum = PointerGetDatum(datum_l);
|
|
v->spl_rdatum = PointerGetDatum(datum_r);
|
|
|
|
PG_RETURN_POINTER(v);
|
|
}
|
|
|
|
static bool
|
|
gist_te(ltree_gist *key, ltree *query, int siglen)
|
|
{
|
|
ltree_level *curq = LTREE_FIRST(query);
|
|
BITVECP sign = LTG_SIGN(key);
|
|
int qlen = query->numlevel;
|
|
unsigned int hv;
|
|
|
|
if (LTG_ISALLTRUE(key))
|
|
return true;
|
|
|
|
while (qlen > 0)
|
|
{
|
|
hv = ltree_crc32_sz(curq->name, curq->len);
|
|
if (!GETBIT(sign, AHASHVAL(hv, siglen)))
|
|
return false;
|
|
curq = LEVEL_NEXT(curq);
|
|
qlen--;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
typedef struct LtreeSignature
|
|
{
|
|
BITVECP sign;
|
|
int siglen;
|
|
} LtreeSignature;
|
|
|
|
static bool
|
|
checkcondition_bit(void *cxt, ITEM *val)
|
|
{
|
|
LtreeSignature *sig = cxt;
|
|
|
|
return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(sig->sign, AHASHVAL(val->val, sig->siglen)) : true;
|
|
}
|
|
|
|
static bool
|
|
gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
|
|
{
|
|
LtreeSignature sig;
|
|
|
|
if (LTG_ISALLTRUE(key))
|
|
return true;
|
|
|
|
sig.sign = LTG_SIGN(key);
|
|
sig.siglen = siglen;
|
|
|
|
return ltree_execute(GETQUERY(query),
|
|
&sig, false,
|
|
checkcondition_bit);
|
|
}
|
|
|
|
static bool
|
|
gist_qe(ltree_gist *key, lquery *query, int siglen)
|
|
{
|
|
lquery_level *curq = LQUERY_FIRST(query);
|
|
BITVECP sign = LTG_SIGN(key);
|
|
int qlen = query->numlevel;
|
|
|
|
if (LTG_ISALLTRUE(key))
|
|
return true;
|
|
|
|
while (qlen > 0)
|
|
{
|
|
if (curq->numvar && LQL_CANLOOKSIGN(curq))
|
|
{
|
|
bool isexist = false;
|
|
int vlen = curq->numvar;
|
|
lquery_variant *curv = LQL_FIRST(curq);
|
|
|
|
while (vlen > 0)
|
|
{
|
|
if (GETBIT(sign, AHASHVAL(curv->val, siglen)))
|
|
{
|
|
isexist = true;
|
|
break;
|
|
}
|
|
curv = LVAR_NEXT(curv);
|
|
vlen--;
|
|
}
|
|
if (!isexist)
|
|
return false;
|
|
}
|
|
|
|
curq = LQL_NEXT(curq);
|
|
qlen--;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
_arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
|
|
{
|
|
lquery *query = (lquery *) ARR_DATA_PTR(_query);
|
|
int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
|
|
|
|
if (ARR_NDIM(_query) > 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array must be one-dimensional")));
|
|
if (array_contains_nulls(_query))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
|
errmsg("array must not contain nulls")));
|
|
|
|
while (num > 0)
|
|
{
|
|
if (gist_qe(key, query, siglen))
|
|
return true;
|
|
num--;
|
|
query = (lquery *) NEXTVAL(query);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Datum
|
|
_ltree_consistent(PG_FUNCTION_ARGS)
|
|
{
|
|
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
|
void *query = PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
|
|
|
/* Oid subtype = PG_GETARG_OID(3); */
|
|
bool *recheck = (bool *) PG_GETARG_POINTER(4);
|
|
int siglen = LTREE_GET_ASIGLEN();
|
|
ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
|
|
bool res = false;
|
|
|
|
/* All cases served by this function are inexact */
|
|
*recheck = true;
|
|
|
|
switch (strategy)
|
|
{
|
|
case 10:
|
|
case 11:
|
|
res = gist_te(key, (ltree *) query, siglen);
|
|
break;
|
|
case 12:
|
|
case 13:
|
|
res = gist_qe(key, (lquery *) query, siglen);
|
|
break;
|
|
case 14:
|
|
case 15:
|
|
res = gist_qtxt(key, (ltxtquery *) query, siglen);
|
|
break;
|
|
case 16:
|
|
case 17:
|
|
res = _arrq_cons(key, (ArrayType *) query, siglen);
|
|
break;
|
|
default:
|
|
/* internal error */
|
|
elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
|
|
}
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_BOOL(res);
|
|
}
|
|
|
|
Datum
|
|
_ltree_gist_options(PG_FUNCTION_ARGS)
|
|
{
|
|
local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
|
|
|
|
init_local_reloptions(relopts, sizeof(LtreeGistOptions));
|
|
add_local_int_reloption(relopts, "siglen", "signature length",
|
|
LTREE_ASIGLEN_DEFAULT, 1, LTREE_ASIGLEN_MAX,
|
|
offsetof(LtreeGistOptions, siglen));
|
|
|
|
PG_RETURN_VOID();
|
|
}
|