mirror of
https://github.com/postgres/postgres.git
synced 2025-05-13 01:13:08 -04:00
Per my recent proposal(s). Null key datums can now be returned by extractValue and extractQuery functions, and will be stored in the index. Also, placeholder entries are made for indexable items that are NULL or contain no keys according to extractValue. This means that the index is now always complete, having at least one entry for every indexed heap TID, and so we can get rid of the prohibition on full-index scans. A full-index scan is implemented much the same way as partial-match scans were already: we build a bitmap representing all the TIDs found in the index, and then drive the results off that. Also, introduce a concept of a "search mode" that can be requested by extractQuery when the operator requires matching to empty items (this is just as cheap as matching to a single key) or requires a full index scan (which is not so cheap, but it sure beats failing or giving wrong answers). The behavior remains backward compatible for opclasses that don't return any null keys or request a non-default search mode. Using these features, we can now make the GIN index opclass for anyarray behave in a way that matches the actual anyarray operators for &&, <@, @>, and = ... which it failed to do before in assorted corner cases. This commit fixes the core GIN code and ginarrayprocs.c, updates the documentation, and adds some simple regression test cases for the new behaviors using the array operators. The tsearch and contrib GIN opclass support functions still need to be looked over and probably fixed. Another thing I intend to fix separately is that this is pretty inefficient for cases where more than one scan condition needs a full-index search: we'll run duplicate GinScanEntrys, each one of which builds a large bitmap. There is some existing logic to merge duplicate GinScanEntrys but it needs refactoring to make it work for entries belonging to different scan keys. Note that most of gin.h has been split out into a new file gin_private.h, so that gin.h doesn't export anything that's not supposed to be used by GIN opclasses or the rest of the backend. I did quite a bit of other code beautification work as well, mostly fixing comments and choosing more appropriate names for things.
581 lines
14 KiB
C
581 lines
14 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* ginutil.c
|
|
* utilities routines for the postgres inverted index access method.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/gin/ginutil.c
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/gin_private.h"
|
|
#include "access/reloptions.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "miscadmin.h"
|
|
#include "storage/bufmgr.h"
|
|
#include "storage/freespace.h"
|
|
#include "storage/indexfsm.h"
|
|
#include "storage/lmgr.h"
|
|
|
|
|
|
/*
|
|
* initGinState: fill in an empty GinState struct to describe the index
|
|
*
|
|
* Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
|
|
*/
|
|
void
|
|
initGinState(GinState *state, Relation index)
|
|
{
|
|
TupleDesc origTupdesc = RelationGetDescr(index);
|
|
int i;
|
|
|
|
MemSet(state, 0, sizeof(GinState));
|
|
|
|
state->index = index;
|
|
state->oneCol = (origTupdesc->natts == 1) ? true : false;
|
|
state->origTupdesc = origTupdesc;
|
|
|
|
for (i = 0; i < origTupdesc->natts; i++)
|
|
{
|
|
if (state->oneCol)
|
|
state->tupdesc[i] = state->origTupdesc;
|
|
else
|
|
{
|
|
state->tupdesc[i] = CreateTemplateTupleDesc(2, false);
|
|
|
|
TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
|
|
INT2OID, -1, 0);
|
|
TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
|
|
origTupdesc->attrs[i]->atttypid,
|
|
origTupdesc->attrs[i]->atttypmod,
|
|
origTupdesc->attrs[i]->attndims);
|
|
}
|
|
|
|
fmgr_info_copy(&(state->compareFn[i]),
|
|
index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(state->extractValueFn[i]),
|
|
index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(state->extractQueryFn[i]),
|
|
index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(state->consistentFn[i]),
|
|
index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
|
|
CurrentMemoryContext);
|
|
|
|
/*
|
|
* Check opclass capability to do partial match.
|
|
*/
|
|
if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
|
|
{
|
|
fmgr_info_copy(&(state->comparePartialFn[i]),
|
|
index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
|
|
CurrentMemoryContext);
|
|
|
|
state->canPartialMatch[i] = true;
|
|
}
|
|
else
|
|
{
|
|
state->canPartialMatch[i] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Extract attribute (column) number of stored entry from GIN tuple
|
|
*/
|
|
OffsetNumber
|
|
gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
|
|
{
|
|
OffsetNumber colN;
|
|
|
|
if (ginstate->oneCol)
|
|
{
|
|
/* column number is not stored explicitly */
|
|
colN = FirstOffsetNumber;
|
|
}
|
|
else
|
|
{
|
|
Datum res;
|
|
bool isnull;
|
|
|
|
/*
|
|
* First attribute is always int16, so we can safely use any tuple
|
|
* descriptor to obtain first attribute of tuple
|
|
*/
|
|
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
|
|
&isnull);
|
|
Assert(!isnull);
|
|
|
|
colN = DatumGetUInt16(res);
|
|
Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
|
|
}
|
|
|
|
return colN;
|
|
}
|
|
|
|
/*
|
|
* Extract stored datum (and possible null category) from GIN tuple
|
|
*/
|
|
Datum
|
|
gintuple_get_key(GinState *ginstate, IndexTuple tuple,
|
|
GinNullCategory *category)
|
|
{
|
|
Datum res;
|
|
bool isnull;
|
|
|
|
if (ginstate->oneCol)
|
|
{
|
|
/*
|
|
* Single column index doesn't store attribute numbers in tuples
|
|
*/
|
|
res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
|
|
&isnull);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Since the datum type depends on which index column it's from, we
|
|
* must be careful to use the right tuple descriptor here.
|
|
*/
|
|
OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
|
|
|
|
res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
|
|
ginstate->tupdesc[colN - 1],
|
|
&isnull);
|
|
}
|
|
|
|
if (isnull)
|
|
*category = GinGetNullCategory(tuple, ginstate);
|
|
else
|
|
*category = GIN_CAT_NORM_KEY;
|
|
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new page (either by recycling, or by extending the index file)
|
|
* The returned buffer is already pinned and exclusive-locked
|
|
* Caller is responsible for initializing the page by calling GinInitBuffer
|
|
*/
|
|
Buffer
|
|
GinNewBuffer(Relation index)
|
|
{
|
|
Buffer buffer;
|
|
bool needLock;
|
|
|
|
/* First, try to get a page from FSM */
|
|
for (;;)
|
|
{
|
|
BlockNumber blkno = GetFreeIndexPage(index);
|
|
|
|
if (blkno == InvalidBlockNumber)
|
|
break;
|
|
|
|
buffer = ReadBuffer(index, blkno);
|
|
|
|
/*
|
|
* We have to guard against the possibility that someone else already
|
|
* recycled this page; the buffer may be locked if so.
|
|
*/
|
|
if (ConditionalLockBuffer(buffer))
|
|
{
|
|
Page page = BufferGetPage(buffer);
|
|
|
|
if (PageIsNew(page))
|
|
return buffer; /* OK to use, if never initialized */
|
|
|
|
if (GinPageIsDeleted(page))
|
|
return buffer; /* OK to use */
|
|
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
}
|
|
|
|
/* Can't use it, so release buffer and try again */
|
|
ReleaseBuffer(buffer);
|
|
}
|
|
|
|
/* Must extend the file */
|
|
needLock = !RELATION_IS_LOCAL(index);
|
|
if (needLock)
|
|
LockRelationForExtension(index, ExclusiveLock);
|
|
|
|
buffer = ReadBuffer(index, P_NEW);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
|
|
if (needLock)
|
|
UnlockRelationForExtension(index, ExclusiveLock);
|
|
|
|
return buffer;
|
|
}
|
|
|
|
void
|
|
GinInitPage(Page page, uint32 f, Size pageSize)
|
|
{
|
|
GinPageOpaque opaque;
|
|
|
|
PageInit(page, pageSize, sizeof(GinPageOpaqueData));
|
|
|
|
opaque = GinPageGetOpaque(page);
|
|
memset(opaque, 0, sizeof(GinPageOpaqueData));
|
|
opaque->flags = f;
|
|
opaque->rightlink = InvalidBlockNumber;
|
|
}
|
|
|
|
void
|
|
GinInitBuffer(Buffer b, uint32 f)
|
|
{
|
|
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
|
|
}
|
|
|
|
void
|
|
GinInitMetabuffer(Buffer b)
|
|
{
|
|
GinMetaPageData *metadata;
|
|
Page page = BufferGetPage(b);
|
|
|
|
GinInitPage(page, GIN_META, BufferGetPageSize(b));
|
|
|
|
metadata = GinPageGetMeta(page);
|
|
|
|
metadata->head = metadata->tail = InvalidBlockNumber;
|
|
metadata->tailFreeSize = 0;
|
|
metadata->nPendingPages = 0;
|
|
metadata->nPendingHeapTuples = 0;
|
|
metadata->nTotalPages = 0;
|
|
metadata->nEntryPages = 0;
|
|
metadata->nDataPages = 0;
|
|
metadata->nEntries = 0;
|
|
metadata->ginVersion = GIN_CURRENT_VERSION;
|
|
}
|
|
|
|
/*
|
|
* Compare two keys of the same index column
|
|
*/
|
|
int
|
|
ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
|
|
Datum a, GinNullCategory categorya,
|
|
Datum b, GinNullCategory categoryb)
|
|
{
|
|
/* if not of same null category, sort by that first */
|
|
if (categorya != categoryb)
|
|
return (categorya < categoryb) ? -1 : 1;
|
|
|
|
/* all null items in same category are equal */
|
|
if (categorya != GIN_CAT_NORM_KEY)
|
|
return 0;
|
|
|
|
/* both not null, so safe to call the compareFn */
|
|
return DatumGetInt32(FunctionCall2(&ginstate->compareFn[attnum - 1],
|
|
a, b));
|
|
}
|
|
|
|
/*
|
|
* Compare two keys of possibly different index columns
|
|
*/
|
|
int
|
|
ginCompareAttEntries(GinState *ginstate,
|
|
OffsetNumber attnuma, Datum a, GinNullCategory categorya,
|
|
OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
|
|
{
|
|
/* attribute number is the first sort key */
|
|
if (attnuma != attnumb)
|
|
return (attnuma < attnumb) ? -1 : 1;
|
|
|
|
return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
|
|
}
|
|
|
|
|
|
/*
|
|
* Support for sorting key datums in ginExtractEntries
|
|
*
|
|
* Note: we only have to worry about null and not-null keys here;
|
|
* ginExtractEntries never generates more than one placeholder null,
|
|
* so it doesn't have to sort those.
|
|
*/
|
|
typedef struct
|
|
{
|
|
Datum datum;
|
|
bool isnull;
|
|
} keyEntryData;
|
|
|
|
typedef struct
|
|
{
|
|
FmgrInfo *cmpDatumFunc;
|
|
bool haveDups;
|
|
} cmpEntriesArg;
|
|
|
|
static int
|
|
cmpEntries(const void *a, const void *b, void *arg)
|
|
{
|
|
const keyEntryData *aa = (const keyEntryData *) a;
|
|
const keyEntryData *bb = (const keyEntryData *) b;
|
|
cmpEntriesArg *data = (cmpEntriesArg *) arg;
|
|
int res;
|
|
|
|
if (aa->isnull)
|
|
{
|
|
if (bb->isnull)
|
|
res = 0; /* NULL "=" NULL */
|
|
else
|
|
res = 1; /* NULL ">" not-NULL */
|
|
}
|
|
else if (bb->isnull)
|
|
res = -1; /* not-NULL "<" NULL */
|
|
else
|
|
res = DatumGetInt32(FunctionCall2(data->cmpDatumFunc,
|
|
aa->datum, bb->datum));
|
|
|
|
/*
|
|
* Detect if we have any duplicates. If there are equal keys, qsort
|
|
* must compare them at some point, else it wouldn't know whether one
|
|
* should go before or after the other.
|
|
*/
|
|
if (res == 0)
|
|
data->haveDups = true;
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/*
|
|
* Extract the index key values from an indexable item
|
|
*
|
|
* The resulting key values are sorted, and any duplicates are removed.
|
|
* This avoids generating redundant index entries.
|
|
*/
|
|
Datum *
|
|
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
|
Datum value, bool isNull,
|
|
int32 *nentries, GinNullCategory **categories)
|
|
{
|
|
Datum *entries;
|
|
bool *nullFlags;
|
|
int32 i;
|
|
|
|
/*
|
|
* We don't call the extractValueFn on a null item. Instead generate a
|
|
* placeholder.
|
|
*/
|
|
if (isNull)
|
|
{
|
|
*nentries = 1;
|
|
entries = (Datum *) palloc(sizeof(Datum));
|
|
entries[0] = (Datum) 0;
|
|
*categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
|
|
(*categories)[0] = GIN_CAT_NULL_ITEM;
|
|
return entries;
|
|
}
|
|
|
|
/* OK, call the opclass's extractValueFn */
|
|
nullFlags = NULL; /* in case extractValue doesn't set it */
|
|
entries = (Datum *)
|
|
DatumGetPointer(FunctionCall3(&ginstate->extractValueFn[attnum - 1],
|
|
value,
|
|
PointerGetDatum(nentries),
|
|
PointerGetDatum(&nullFlags)));
|
|
|
|
/*
|
|
* Generate a placeholder if the item contained no keys.
|
|
*/
|
|
if (entries == NULL || *nentries <= 0)
|
|
{
|
|
*nentries = 1;
|
|
entries = (Datum *) palloc(sizeof(Datum));
|
|
entries[0] = (Datum) 0;
|
|
*categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
|
|
(*categories)[0] = GIN_CAT_EMPTY_ITEM;
|
|
return entries;
|
|
}
|
|
|
|
/*
|
|
* If the extractValueFn didn't create a nullFlags array, create one,
|
|
* assuming that everything's non-null. Otherwise, run through the
|
|
* array and make sure each value is exactly 0 or 1; this ensures
|
|
* binary compatibility with the GinNullCategory representation.
|
|
*/
|
|
if (nullFlags == NULL)
|
|
nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
|
|
else
|
|
{
|
|
for (i = 0; i < *nentries; i++)
|
|
nullFlags[i] = (nullFlags[i] ? true : false);
|
|
}
|
|
/* now we can use the nullFlags as category codes */
|
|
*categories = (GinNullCategory *) nullFlags;
|
|
|
|
/*
|
|
* If there's more than one key, sort and unique-ify.
|
|
*
|
|
* XXX Using qsort here is notationally painful, and the overhead is
|
|
* pretty bad too. For small numbers of keys it'd likely be better to
|
|
* use a simple insertion sort.
|
|
*/
|
|
if (*nentries > 1)
|
|
{
|
|
keyEntryData *keydata;
|
|
cmpEntriesArg arg;
|
|
|
|
keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
|
|
for (i = 0; i < *nentries; i++)
|
|
{
|
|
keydata[i].datum = entries[i];
|
|
keydata[i].isnull = nullFlags[i];
|
|
}
|
|
|
|
arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
|
|
arg.haveDups = false;
|
|
qsort_arg(keydata, *nentries, sizeof(keyEntryData),
|
|
cmpEntries, (void *) &arg);
|
|
|
|
if (arg.haveDups)
|
|
{
|
|
/* there are duplicates, must get rid of 'em */
|
|
int32 j;
|
|
|
|
entries[0] = keydata[0].datum;
|
|
nullFlags[0] = keydata[0].isnull;
|
|
j = 1;
|
|
for (i = 1; i < *nentries; i++)
|
|
{
|
|
if (cmpEntries(&keydata[i-1], &keydata[i], &arg) != 0)
|
|
{
|
|
entries[j] = keydata[i].datum;
|
|
nullFlags[j] = keydata[i].isnull;
|
|
j++;
|
|
}
|
|
}
|
|
*nentries = j;
|
|
}
|
|
else
|
|
{
|
|
/* easy, no duplicates */
|
|
for (i = 0; i < *nentries; i++)
|
|
{
|
|
entries[i] = keydata[i].datum;
|
|
nullFlags[i] = keydata[i].isnull;
|
|
}
|
|
}
|
|
|
|
pfree(keydata);
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
Datum
|
|
ginoptions(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum reloptions = PG_GETARG_DATUM(0);
|
|
bool validate = PG_GETARG_BOOL(1);
|
|
relopt_value *options;
|
|
GinOptions *rdopts;
|
|
int numoptions;
|
|
static const relopt_parse_elt tab[] = {
|
|
{"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
|
|
};
|
|
|
|
options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
|
|
&numoptions);
|
|
|
|
/* if none set, we're done */
|
|
if (numoptions == 0)
|
|
PG_RETURN_NULL();
|
|
|
|
rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
|
|
|
|
fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
|
|
validate, tab, lengthof(tab));
|
|
|
|
pfree(options);
|
|
|
|
PG_RETURN_BYTEA_P(rdopts);
|
|
}
|
|
|
|
/*
|
|
* Fetch index's statistical data into *stats
|
|
*
|
|
* Note: in the result, nPendingPages can be trusted to be up-to-date,
|
|
* as can ginVersion; but the other fields are as of the last VACUUM.
|
|
*/
|
|
void
|
|
ginGetStats(Relation index, GinStatsData *stats)
|
|
{
|
|
Buffer metabuffer;
|
|
Page metapage;
|
|
GinMetaPageData *metadata;
|
|
|
|
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
|
|
LockBuffer(metabuffer, GIN_SHARE);
|
|
metapage = BufferGetPage(metabuffer);
|
|
metadata = GinPageGetMeta(metapage);
|
|
|
|
stats->nPendingPages = metadata->nPendingPages;
|
|
stats->nTotalPages = metadata->nTotalPages;
|
|
stats->nEntryPages = metadata->nEntryPages;
|
|
stats->nDataPages = metadata->nDataPages;
|
|
stats->nEntries = metadata->nEntries;
|
|
stats->ginVersion = metadata->ginVersion;
|
|
|
|
UnlockReleaseBuffer(metabuffer);
|
|
}
|
|
|
|
/*
|
|
* Write the given statistics to the index's metapage
|
|
*
|
|
* Note: nPendingPages and ginVersion are *not* copied over
|
|
*/
|
|
void
|
|
ginUpdateStats(Relation index, const GinStatsData *stats)
|
|
{
|
|
Buffer metabuffer;
|
|
Page metapage;
|
|
GinMetaPageData *metadata;
|
|
|
|
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
|
|
LockBuffer(metabuffer, GIN_EXCLUSIVE);
|
|
metapage = BufferGetPage(metabuffer);
|
|
metadata = GinPageGetMeta(metapage);
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
metadata->nTotalPages = stats->nTotalPages;
|
|
metadata->nEntryPages = stats->nEntryPages;
|
|
metadata->nDataPages = stats->nDataPages;
|
|
metadata->nEntries = stats->nEntries;
|
|
|
|
MarkBufferDirty(metabuffer);
|
|
|
|
if (RelationNeedsWAL(index))
|
|
{
|
|
XLogRecPtr recptr;
|
|
ginxlogUpdateMeta data;
|
|
XLogRecData rdata;
|
|
|
|
data.node = index->rd_node;
|
|
data.ntuples = 0;
|
|
data.newRightlink = data.prevTail = InvalidBlockNumber;
|
|
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
|
|
|
|
rdata.buffer = InvalidBuffer;
|
|
rdata.data = (char *) &data;
|
|
rdata.len = sizeof(ginxlogUpdateMeta);
|
|
rdata.next = NULL;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
|
|
PageSetLSN(metapage, recptr);
|
|
PageSetTLI(metapage, ThisTimeLineID);
|
|
}
|
|
|
|
UnlockReleaseBuffer(metabuffer);
|
|
|
|
END_CRIT_SECTION();
|
|
}
|