mirror of
https://github.com/postgres/postgres.git
synced 2025-05-13 01:13:08 -04:00
amcheck: Add gin_index_check() to verify GIN index
Adds a new function, validating two kinds of invariants on a GIN index: - parent-child consistency: Paths in a GIN graph have to contain consistent keys. Tuples on parent pages consistently include tuples from child pages; parent tuples do not require any adjustments. - balanced-tree / graph: Each internal page has at least one downlink, and can reference either only leaf pages or only internal pages. The GIN verification is based on work by Grigory Kryachko, reworked by Heikki Linnakangas and with various improvements by Andrey Borodin. Investigation and fixes for multiple bugs by Kirill Reshke. Author: Grigory Kryachko <GSKryachko@gmail.com> Author: Heikki Linnakangas <hlinnaka@iki.fi> Author: Andrey Borodin <amborodin@acm.org> Reviewed-By: José Villanova <jose.arthur@gmail.com> Reviewed-By: Aleksander Alekseev <aleksander@timescale.com> Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com> Reviewed-By: Andres Freund <andres@anarazel.de> Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com> Reviewed-By: Kirill Reshke <reshkekirill@gmail.com> Reviewed-By: Mark Dilger <mark.dilger@enterprisedb.com> Reviewed-By: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
This commit is contained in:
parent
53a2a1564a
commit
14ffaece0f
@ -4,14 +4,16 @@ MODULE_big = amcheck
|
||||
OBJS = \
|
||||
$(WIN32RES) \
|
||||
verify_common.o \
|
||||
verify_gin.o \
|
||||
verify_heapam.o \
|
||||
verify_nbtree.o
|
||||
|
||||
EXTENSION = amcheck
|
||||
DATA = amcheck--1.3--1.4.sql amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
|
||||
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
|
||||
amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
|
||||
PGFILEDESC = "amcheck - function for verifying relation integrity"
|
||||
|
||||
REGRESS = check check_btree check_heap
|
||||
REGRESS = check check_btree check_gin check_heap
|
||||
|
||||
EXTRA_INSTALL = contrib/pg_walinspect
|
||||
TAP_TESTS = 1
|
||||
|
14
contrib/amcheck/amcheck--1.4--1.5.sql
Normal file
14
contrib/amcheck/amcheck--1.4--1.5.sql
Normal file
@ -0,0 +1,14 @@
|
||||
/* contrib/amcheck/amcheck--1.4--1.5.sql */
|
||||
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.5'" to load this file. \quit
|
||||
|
||||
|
||||
-- gin_index_check()
|
||||
--
|
||||
CREATE FUNCTION gin_index_check(index regclass)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'gin_index_check'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
REVOKE ALL ON FUNCTION gin_index_check(regclass) FROM PUBLIC;
|
@ -1,5 +1,5 @@
|
||||
# amcheck extension
|
||||
comment = 'functions for verifying relation integrity'
|
||||
default_version = '1.4'
|
||||
default_version = '1.5'
|
||||
module_pathname = '$libdir/amcheck'
|
||||
relocatable = true
|
||||
|
64
contrib/amcheck/expected/check_gin.out
Normal file
64
contrib/amcheck/expected/check_gin.out
Normal file
@ -0,0 +1,64 @@
|
||||
-- Test of index bulk load
|
||||
SELECT setseed(1);
|
||||
setseed
|
||||
---------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE "gin_check"("Column1" int[]);
|
||||
-- posting trees (frequently used entries)
|
||||
INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
|
||||
-- posting leaves (sparse entries)
|
||||
INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
|
||||
CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
|
||||
SELECT gin_index_check('gin_check_idx');
|
||||
gin_index_check
|
||||
-----------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE gin_check;
|
||||
-- Test index inserts
|
||||
SELECT setseed(1);
|
||||
setseed
|
||||
---------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE "gin_check"("Column1" int[]);
|
||||
CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
|
||||
ALTER INDEX gin_check_idx SET (fastupdate = false);
|
||||
-- posting trees
|
||||
INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
|
||||
-- posting leaves
|
||||
INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
|
||||
SELECT gin_index_check('gin_check_idx');
|
||||
gin_index_check
|
||||
-----------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE gin_check;
|
||||
-- Test GIN over text array
|
||||
SELECT setseed(1);
|
||||
setseed
|
||||
---------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE "gin_check_text_array"("Column1" text[]);
|
||||
-- posting trees
|
||||
INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
|
||||
-- posting leaves
|
||||
INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
|
||||
CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
|
||||
SELECT gin_index_check('gin_check_text_array_idx');
|
||||
gin_index_check
|
||||
-----------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE gin_check_text_array;
|
@ -2,6 +2,7 @@
|
||||
|
||||
amcheck_sources = files(
|
||||
'verify_common.c',
|
||||
'verify_gin.c',
|
||||
'verify_heapam.c',
|
||||
'verify_nbtree.c',
|
||||
)
|
||||
@ -25,6 +26,7 @@ install_data(
|
||||
'amcheck--1.1--1.2.sql',
|
||||
'amcheck--1.2--1.3.sql',
|
||||
'amcheck--1.3--1.4.sql',
|
||||
'amcheck--1.4--1.5.sql',
|
||||
kwargs: contrib_data_args,
|
||||
)
|
||||
|
||||
@ -36,6 +38,7 @@ tests += {
|
||||
'sql': [
|
||||
'check',
|
||||
'check_btree',
|
||||
'check_gin',
|
||||
'check_heap',
|
||||
],
|
||||
},
|
||||
|
40
contrib/amcheck/sql/check_gin.sql
Normal file
40
contrib/amcheck/sql/check_gin.sql
Normal file
@ -0,0 +1,40 @@
|
||||
-- Test of index bulk load
|
||||
SELECT setseed(1);
|
||||
CREATE TABLE "gin_check"("Column1" int[]);
|
||||
-- posting trees (frequently used entries)
|
||||
INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
|
||||
-- posting leaves (sparse entries)
|
||||
INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
|
||||
CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
|
||||
SELECT gin_index_check('gin_check_idx');
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE gin_check;
|
||||
|
||||
-- Test index inserts
|
||||
SELECT setseed(1);
|
||||
CREATE TABLE "gin_check"("Column1" int[]);
|
||||
CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
|
||||
ALTER INDEX gin_check_idx SET (fastupdate = false);
|
||||
-- posting trees
|
||||
INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
|
||||
-- posting leaves
|
||||
INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
|
||||
|
||||
SELECT gin_index_check('gin_check_idx');
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE gin_check;
|
||||
|
||||
-- Test GIN over text array
|
||||
SELECT setseed(1);
|
||||
CREATE TABLE "gin_check_text_array"("Column1" text[]);
|
||||
-- posting trees
|
||||
INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
|
||||
-- posting leaves
|
||||
INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
|
||||
CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
|
||||
SELECT gin_index_check('gin_check_text_array_idx');
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE gin_check_text_array;
|
798
contrib/amcheck/verify_gin.c
Normal file
798
contrib/amcheck/verify_gin.c
Normal file
@ -0,0 +1,798 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* verify_gin.c
|
||||
* Verifies the integrity of GIN indexes based on invariants.
|
||||
*
|
||||
*
|
||||
* GIN index verification checks a number of invariants:
|
||||
*
|
||||
* - consistency: Paths in GIN graph have to contain consistent keys: tuples
|
||||
* on parent pages consistently include tuples from children pages.
|
||||
*
|
||||
* - graph invariants: Each internal page must have at least one downlink, and
|
||||
* can reference either only leaf pages or only internal pages.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2016-2025, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* contrib/amcheck/verify_gin.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gin_private.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "catalog/pg_am.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
#include "verify_common.h"
|
||||
#include "string.h"
|
||||
|
||||
/*
|
||||
* GinScanItem represents one item of depth-first scan of the index.
|
||||
*/
|
||||
typedef struct GinScanItem
|
||||
{
|
||||
int depth;
|
||||
IndexTuple parenttup;
|
||||
BlockNumber parentblk;
|
||||
XLogRecPtr parentlsn;
|
||||
BlockNumber blkno;
|
||||
struct GinScanItem *next;
|
||||
} GinScanItem;
|
||||
|
||||
/*
|
||||
* GinPostingTreeScanItem represents one item of a depth-first posting tree scan.
|
||||
*/
|
||||
typedef struct GinPostingTreeScanItem
|
||||
{
|
||||
int depth;
|
||||
ItemPointerData parentkey;
|
||||
BlockNumber parentblk;
|
||||
BlockNumber blkno;
|
||||
struct GinPostingTreeScanItem *next;
|
||||
} GinPostingTreeScanItem;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_index_check);
|
||||
|
||||
static void gin_check_parent_keys_consistency(Relation rel,
|
||||
Relation heaprel,
|
||||
void *callback_state, bool readonly);
|
||||
static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
|
||||
static IndexTuple gin_refind_parent(Relation rel,
|
||||
BlockNumber parentblkno,
|
||||
BlockNumber childblkno,
|
||||
BufferAccessStrategy strategy);
|
||||
static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
|
||||
OffsetNumber offset);
|
||||
|
||||
/*
|
||||
* gin_index_check(index regclass)
|
||||
*
|
||||
* Verify integrity of GIN index.
|
||||
*
|
||||
* Acquires AccessShareLock on heap & index relations.
|
||||
*/
|
||||
Datum
|
||||
gin_index_check(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid indrelid = PG_GETARG_OID(0);
|
||||
|
||||
amcheck_lock_relation_and_check(indrelid,
|
||||
GIN_AM_OID,
|
||||
gin_check_parent_keys_consistency,
|
||||
AccessShareLock,
|
||||
NULL);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* Read item pointers from leaf entry tuple.
|
||||
*
|
||||
* Returns a palloc'd array of ItemPointers. The number of items is returned
|
||||
* in *nitems.
|
||||
*/
|
||||
static ItemPointer
|
||||
ginReadTupleWithoutState(IndexTuple itup, int *nitems)
|
||||
{
|
||||
Pointer ptr = GinGetPosting(itup);
|
||||
int nipd = GinGetNPosting(itup);
|
||||
ItemPointer ipd;
|
||||
int ndecoded;
|
||||
|
||||
if (GinItupIsCompressed(itup))
|
||||
{
|
||||
if (nipd > 0)
|
||||
{
|
||||
ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
|
||||
if (nipd != ndecoded)
|
||||
elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
|
||||
nipd, ndecoded);
|
||||
}
|
||||
else
|
||||
ipd = palloc(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
|
||||
memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
|
||||
}
|
||||
*nitems = nipd;
|
||||
return ipd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scans through a posting tree (given by the root), and verifies that the keys
|
||||
* on a child keys are consistent with the parent.
|
||||
*
|
||||
* Allocates a separate memory context and scans through posting tree graph.
|
||||
*/
|
||||
static void
|
||||
gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root)
|
||||
{
|
||||
BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
|
||||
GinPostingTreeScanItem *stack;
|
||||
MemoryContext mctx;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
int leafdepth;
|
||||
|
||||
mctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"posting tree check context",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
oldcontext = MemoryContextSwitchTo(mctx);
|
||||
|
||||
/*
|
||||
* We don't know the height of the tree yet, but as soon as we encounter a
|
||||
* leaf page, we will set 'leafdepth' to its depth.
|
||||
*/
|
||||
leafdepth = -1;
|
||||
|
||||
/* Start the scan at the root page */
|
||||
stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem));
|
||||
stack->depth = 0;
|
||||
ItemPointerSetInvalid(&stack->parentkey);
|
||||
stack->parentblk = InvalidBlockNumber;
|
||||
stack->blkno = posting_tree_root;
|
||||
|
||||
elog(DEBUG3, "processing posting tree at blk %u", posting_tree_root);
|
||||
|
||||
while (stack)
|
||||
{
|
||||
GinPostingTreeScanItem *stack_next;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
OffsetNumber i,
|
||||
maxoff;
|
||||
BlockNumber rightlink;
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
|
||||
RBM_NORMAL, strategy);
|
||||
LockBuffer(buffer, GIN_SHARE);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
/* Check that the tree has the same height in all branches */
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
ItemPointerData minItem;
|
||||
int nlist;
|
||||
ItemPointerData *list;
|
||||
char tidrange_buf[MAXPGPATH];
|
||||
|
||||
ItemPointerSetMin(&minItem);
|
||||
|
||||
elog(DEBUG1, "page blk: %u, type leaf", stack->blkno);
|
||||
|
||||
if (leafdepth == -1)
|
||||
leafdepth = stack->depth;
|
||||
else if (stack->depth != leafdepth)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
|
||||
RelationGetRelationName(rel), stack->blkno)));
|
||||
list = GinDataLeafPageGetItems(page, &nlist, minItem);
|
||||
|
||||
if (nlist > 0)
|
||||
snprintf(tidrange_buf, sizeof(tidrange_buf),
|
||||
"%d tids (%u, %u) - (%u, %u)",
|
||||
nlist,
|
||||
ItemPointerGetBlockNumberNoCheck(&list[0]),
|
||||
ItemPointerGetOffsetNumberNoCheck(&list[0]),
|
||||
ItemPointerGetBlockNumberNoCheck(&list[nlist - 1]),
|
||||
ItemPointerGetOffsetNumberNoCheck(&list[nlist - 1]));
|
||||
else
|
||||
snprintf(tidrange_buf, sizeof(tidrange_buf), "0 tids");
|
||||
|
||||
if (stack->parentblk != InvalidBlockNumber)
|
||||
elog(DEBUG3, "blk %u: parent %u highkey (%u, %u), %s",
|
||||
stack->blkno,
|
||||
stack->parentblk,
|
||||
ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
|
||||
ItemPointerGetOffsetNumberNoCheck(&stack->parentkey),
|
||||
tidrange_buf);
|
||||
else
|
||||
elog(DEBUG3, "blk %u: root leaf, %s",
|
||||
stack->blkno,
|
||||
tidrange_buf);
|
||||
|
||||
if (stack->parentblk != InvalidBlockNumber &&
|
||||
ItemPointerGetOffsetNumberNoCheck(&stack->parentkey) != InvalidOffsetNumber &&
|
||||
nlist > 0 && ItemPointerCompare(&stack->parentkey, &list[nlist - 1]) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u",
|
||||
RelationGetRelationName(rel), stack->blkno)));
|
||||
}
|
||||
else
|
||||
{
|
||||
LocationIndex pd_lower;
|
||||
ItemPointerData bound;
|
||||
int lowersize;
|
||||
|
||||
/*
|
||||
* Check that tuples in each page are properly ordered and
|
||||
* consistent with parent high key
|
||||
*/
|
||||
maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
rightlink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
elog(DEBUG1, "page blk: %u, type data, maxoff %d", stack->blkno, maxoff);
|
||||
|
||||
if (stack->parentblk != InvalidBlockNumber)
|
||||
elog(DEBUG3, "blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)",
|
||||
stack->blkno, maxoff, stack->parentblk,
|
||||
ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
|
||||
ItemPointerGetOffsetNumberNoCheck(&stack->parentkey));
|
||||
else
|
||||
elog(DEBUG3, "blk %u: root internal posting tree page with %u items",
|
||||
stack->blkno, maxoff);
|
||||
|
||||
/*
|
||||
* A GIN posting tree internal page stores PostingItems in the
|
||||
* 'lower' part of the page. The 'upper' part is unused. The
|
||||
* number of elements is stored in the opaque area (maxoff). Make
|
||||
* sure the size of the 'lower' part agrees with 'maxoff'
|
||||
*
|
||||
* We didn't set pd_lower until PostgreSQL version 9.4, so if this
|
||||
* check fails, it could also be because the index was
|
||||
* binary-upgraded from an earlier version. That was a long time
|
||||
* ago, though, so let's warn if it doesn't match.
|
||||
*/
|
||||
pd_lower = ((PageHeader) page)->pd_lower;
|
||||
lowersize = pd_lower - MAXALIGN(SizeOfPageHeaderData);
|
||||
if ((lowersize - MAXALIGN(sizeof(ItemPointerData))) / sizeof(PostingItem) != maxoff)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)",
|
||||
RelationGetRelationName(rel), pd_lower, stack->blkno, maxoff)));
|
||||
|
||||
/*
|
||||
* Before the PostingItems, there's one ItemPointerData in the
|
||||
* 'lower' part that stores the page's high key.
|
||||
*/
|
||||
bound = *GinDataPageGetRightBound(page);
|
||||
|
||||
/*
|
||||
* Gin page right bound has a sane value only when not a highkey on
|
||||
* the rightmost page (at a given level). For the rightmost page does
|
||||
* not store the highkey explicitly, and the value is infinity.
|
||||
*/
|
||||
if (ItemPointerIsValid(&stack->parentkey) &&
|
||||
rightlink != InvalidBlockNumber &&
|
||||
!ItemPointerEquals(&stack->parentkey, &bound))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))",
|
||||
RelationGetRelationName(rel),
|
||||
ItemPointerGetBlockNumberNoCheck(&bound),
|
||||
ItemPointerGetOffsetNumberNoCheck(&bound),
|
||||
stack->blkno, stack->parentblk,
|
||||
ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
|
||||
ItemPointerGetOffsetNumberNoCheck(&stack->parentkey))));
|
||||
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
||||
{
|
||||
GinPostingTreeScanItem *ptr;
|
||||
PostingItem *posting_item = GinDataPageGetPostingItem(page, i);
|
||||
|
||||
/* ItemPointerGetOffsetNumber expects a valid pointer */
|
||||
if (!(i == maxoff &&
|
||||
rightlink == InvalidBlockNumber))
|
||||
elog(DEBUG3, "key (%u, %u) -> %u",
|
||||
ItemPointerGetBlockNumber(&posting_item->key),
|
||||
ItemPointerGetOffsetNumber(&posting_item->key),
|
||||
BlockIdGetBlockNumber(&posting_item->child_blkno));
|
||||
else
|
||||
elog(DEBUG3, "key (%u, %u) -> %u",
|
||||
0, 0, BlockIdGetBlockNumber(&posting_item->child_blkno));
|
||||
|
||||
if (i == maxoff && rightlink == InvalidBlockNumber)
|
||||
{
|
||||
/*
|
||||
* The rightmost item in the tree level has (0, 0) as the
|
||||
* key
|
||||
*/
|
||||
if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) != 0 ||
|
||||
ItemPointerGetOffsetNumberNoCheck(&posting_item->key) != 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)",
|
||||
RelationGetRelationName(rel),
|
||||
stack->blkno,
|
||||
ItemPointerGetBlockNumberNoCheck(&posting_item->key),
|
||||
ItemPointerGetOffsetNumberNoCheck(&posting_item->key))));
|
||||
}
|
||||
else if (i != FirstOffsetNumber)
|
||||
{
|
||||
PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1);
|
||||
|
||||
if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u",
|
||||
RelationGetRelationName(rel), stack->blkno, i)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if this tuple is consistent with the downlink in the
|
||||
* parent.
|
||||
*/
|
||||
if (stack->parentblk != InvalidBlockNumber && i == maxoff &&
|
||||
ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u",
|
||||
RelationGetRelationName(rel),
|
||||
stack->blkno, i)));
|
||||
|
||||
/* This is an internal page, recurse into the child. */
|
||||
ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem));
|
||||
ptr->depth = stack->depth + 1;
|
||||
|
||||
/*
|
||||
* Set rightmost parent key to invalid iterm pointer. Its
|
||||
* value is 'Infinity' and not explicitly stored.
|
||||
*/
|
||||
if (rightlink == InvalidBlockNumber)
|
||||
ItemPointerSetInvalid(&ptr->parentkey);
|
||||
else
|
||||
ptr->parentkey = posting_item->key;
|
||||
|
||||
ptr->parentblk = stack->blkno;
|
||||
ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno);
|
||||
ptr->next = stack->next;
|
||||
stack->next = ptr;
|
||||
}
|
||||
}
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
ReleaseBuffer(buffer);
|
||||
|
||||
/* Step to next item in the queue */
|
||||
stack_next = stack->next;
|
||||
pfree(stack);
|
||||
stack = stack_next;
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
MemoryContextDelete(mctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Main entry point for GIN checks.
|
||||
*
|
||||
* Allocates memory context and scans through the whole GIN graph.
|
||||
*/
|
||||
static void
|
||||
gin_check_parent_keys_consistency(Relation rel,
|
||||
Relation heaprel,
|
||||
void *callback_state,
|
||||
bool readonly)
|
||||
{
|
||||
BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
|
||||
GinScanItem *stack;
|
||||
MemoryContext mctx;
|
||||
MemoryContext oldcontext;
|
||||
GinState state;
|
||||
int leafdepth;
|
||||
|
||||
mctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"amcheck consistency check context",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
oldcontext = MemoryContextSwitchTo(mctx);
|
||||
initGinState(&state, rel);
|
||||
|
||||
/*
|
||||
* We don't know the height of the tree yet, but as soon as we encounter a
|
||||
* leaf page, we will set 'leafdepth' to its depth.
|
||||
*/
|
||||
leafdepth = -1;
|
||||
|
||||
/* Start the scan at the root page */
|
||||
stack = (GinScanItem *) palloc0(sizeof(GinScanItem));
|
||||
stack->depth = 0;
|
||||
stack->parenttup = NULL;
|
||||
stack->parentblk = InvalidBlockNumber;
|
||||
stack->parentlsn = InvalidXLogRecPtr;
|
||||
stack->blkno = GIN_ROOT_BLKNO;
|
||||
|
||||
while (stack)
|
||||
{
|
||||
GinScanItem *stack_next;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
OffsetNumber i,
|
||||
maxoff,
|
||||
prev_attnum;
|
||||
XLogRecPtr lsn;
|
||||
IndexTuple prev_tuple;
|
||||
BlockNumber rightlink;
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
|
||||
RBM_NORMAL, strategy);
|
||||
LockBuffer(buffer, GIN_SHARE);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
lsn = BufferGetLSNAtomic(buffer);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
rightlink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
/* Do basic sanity checks on the page headers */
|
||||
check_index_page(rel, buffer, stack->blkno);
|
||||
|
||||
elog(DEBUG3, "processing entry tree page at blk %u, maxoff: %u", stack->blkno, maxoff);
|
||||
|
||||
/*
|
||||
* It's possible that the page was split since we looked at the
|
||||
* parent, so that we didn't missed the downlink of the right sibling
|
||||
* when we scanned the parent. If so, add the right sibling to the
|
||||
* stack now.
|
||||
*/
|
||||
if (stack->parenttup != NULL)
|
||||
{
|
||||
GinNullCategory parent_key_category;
|
||||
Datum parent_key = gintuple_get_key(&state,
|
||||
stack->parenttup,
|
||||
&parent_key_category);
|
||||
ItemId iid = PageGetItemIdCareful(rel, stack->blkno,
|
||||
page, maxoff);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
|
||||
GinNullCategory page_max_key_category;
|
||||
Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category);
|
||||
|
||||
if (rightlink != InvalidBlockNumber &&
|
||||
ginCompareEntries(&state, attnum, page_max_key,
|
||||
page_max_key_category, parent_key,
|
||||
parent_key_category) > 0)
|
||||
{
|
||||
/* split page detected, install right link to the stack */
|
||||
GinScanItem *ptr;
|
||||
|
||||
elog(DEBUG3, "split detected for blk: %u, parent blk: %u", stack->blkno, stack->parentblk);
|
||||
|
||||
ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
|
||||
ptr->depth = stack->depth;
|
||||
ptr->parenttup = CopyIndexTuple(stack->parenttup);
|
||||
ptr->parentblk = stack->parentblk;
|
||||
ptr->parentlsn = stack->parentlsn;
|
||||
ptr->blkno = rightlink;
|
||||
ptr->next = stack->next;
|
||||
stack->next = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check that the tree has the same height in all branches */
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
if (leafdepth == -1)
|
||||
leafdepth = stack->depth;
|
||||
else if (stack->depth != leafdepth)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
|
||||
RelationGetRelationName(rel), stack->blkno)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that tuples in each page are properly ordered and consistent
|
||||
* with parent high key
|
||||
*/
|
||||
prev_tuple = NULL;
|
||||
prev_attnum = InvalidAttrNumber;
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
||||
{
|
||||
ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
|
||||
GinNullCategory prev_key_category;
|
||||
Datum prev_key;
|
||||
GinNullCategory current_key_category;
|
||||
Datum current_key;
|
||||
|
||||
if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
|
||||
RelationGetRelationName(rel), stack->blkno, i)));
|
||||
|
||||
current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category);
|
||||
|
||||
/*
|
||||
* First block is metadata, skip order check. Also, never check
|
||||
* for high key on rightmost page, as this key is not really
|
||||
* stored explicitly.
|
||||
*
|
||||
* Also make sure to not compare entries for different attnums, which
|
||||
* may be stored on the same page.
|
||||
*/
|
||||
if (i != FirstOffsetNumber && attnum == prev_attnum && stack->blkno != GIN_ROOT_BLKNO &&
|
||||
!(i == maxoff && rightlink == InvalidBlockNumber))
|
||||
{
|
||||
prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category);
|
||||
if (ginCompareEntries(&state, attnum, prev_key,
|
||||
prev_key_category, current_key,
|
||||
current_key_category) >= 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u",
|
||||
RelationGetRelationName(rel), stack->blkno, i, rightlink)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if this tuple is consistent with the downlink in the
|
||||
* parent.
|
||||
*/
|
||||
if (stack->parenttup &&
|
||||
i == maxoff)
|
||||
{
|
||||
GinNullCategory parent_key_category;
|
||||
Datum parent_key = gintuple_get_key(&state,
|
||||
stack->parenttup,
|
||||
&parent_key_category);
|
||||
|
||||
if (ginCompareEntries(&state, attnum, current_key,
|
||||
current_key_category, parent_key,
|
||||
parent_key_category) > 0)
|
||||
{
|
||||
/*
|
||||
* There was a discrepancy between parent and child
|
||||
* tuples. We need to verify it is not a result of
|
||||
* concurrent call of gistplacetopage(). So, lock parent
|
||||
* and try to find downlink for current page. It may be
|
||||
* missing due to concurrent page split, this is OK.
|
||||
*/
|
||||
pfree(stack->parenttup);
|
||||
stack->parenttup = gin_refind_parent(rel, stack->parentblk,
|
||||
stack->blkno, strategy);
|
||||
|
||||
/* We found it - make a final check before failing */
|
||||
if (!stack->parenttup)
|
||||
elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
|
||||
stack->blkno, stack->parentblk);
|
||||
else
|
||||
{
|
||||
parent_key = gintuple_get_key(&state,
|
||||
stack->parenttup,
|
||||
&parent_key_category);
|
||||
|
||||
/*
|
||||
* Check if it is properly adjusted. If succeed,
|
||||
* procced to the next key.
|
||||
*/
|
||||
if (ginCompareEntries(&state, attnum, current_key,
|
||||
current_key_category, parent_key,
|
||||
parent_key_category) > 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has inconsistent records on page %u offset %u",
|
||||
RelationGetRelationName(rel), stack->blkno, i)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If this is an internal page, recurse into the child */
|
||||
if (!GinPageIsLeaf(page))
|
||||
{
|
||||
GinScanItem *ptr;
|
||||
|
||||
ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
|
||||
ptr->depth = stack->depth + 1;
|
||||
/* last tuple in layer has no high key */
|
||||
if (i != maxoff && !GinPageGetOpaque(page)->rightlink)
|
||||
ptr->parenttup = CopyIndexTuple(idxtuple);
|
||||
else
|
||||
ptr->parenttup = NULL;
|
||||
ptr->parentblk = stack->blkno;
|
||||
ptr->blkno = GinGetDownlink(idxtuple);
|
||||
ptr->parentlsn = lsn;
|
||||
ptr->next = stack->next;
|
||||
stack->next = ptr;
|
||||
}
|
||||
/* If this item is a pointer to a posting tree, recurse into it */
|
||||
else if (GinIsPostingTree(idxtuple))
|
||||
{
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(idxtuple);
|
||||
|
||||
gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree);
|
||||
}
|
||||
else
|
||||
{
|
||||
ItemPointer ipd;
|
||||
int nipd;
|
||||
|
||||
ipd = ginReadTupleWithoutState(idxtuple, &nipd);
|
||||
|
||||
for (int j = 0; j < nipd; j++)
|
||||
{
|
||||
if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[j])))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\": posting list contains invalid heap pointer on block %u",
|
||||
RelationGetRelationName(rel), stack->blkno)));
|
||||
}
|
||||
pfree(ipd);
|
||||
}
|
||||
|
||||
prev_tuple = CopyIndexTuple(idxtuple);
|
||||
prev_attnum = attnum;
|
||||
}
|
||||
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
ReleaseBuffer(buffer);
|
||||
|
||||
/* Step to next item in the queue */
|
||||
stack_next = stack->next;
|
||||
if (stack->parenttup)
|
||||
pfree(stack->parenttup);
|
||||
pfree(stack);
|
||||
stack = stack_next;
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
MemoryContextDelete(mctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that a freshly-read page looks sane.
|
||||
*/
|
||||
static void
|
||||
check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
|
||||
/*
|
||||
* ReadBuffer verifies that every newly-read page passes
|
||||
* PageHeaderIsValid, which means it either contains a reasonably sane
|
||||
* page header or is all-zero. We have to defend against the all-zero
|
||||
* case, however.
|
||||
*/
|
||||
if (PageIsNew(page))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" contains unexpected zero page at block %u",
|
||||
RelationGetRelationName(rel),
|
||||
BufferGetBlockNumber(buffer)),
|
||||
errhint("Please REINDEX it.")));
|
||||
|
||||
/*
|
||||
* Additionally check that the special area looks sane.
|
||||
*/
|
||||
if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" contains corrupted page at block %u",
|
||||
RelationGetRelationName(rel),
|
||||
BufferGetBlockNumber(buffer)),
|
||||
errhint("Please REINDEX it.")));
|
||||
|
||||
if (GinPageIsDeleted(page))
|
||||
{
|
||||
if (!GinPageIsLeaf(page))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has deleted internal page %d",
|
||||
RelationGetRelationName(rel), blockNo)));
|
||||
if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has deleted page %d with tuples",
|
||||
RelationGetRelationName(rel), blockNo)));
|
||||
}
|
||||
else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("index \"%s\" has page %d with exceeding count of tuples",
|
||||
RelationGetRelationName(rel), blockNo)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
|
||||
*
|
||||
* If found, returns a palloc'd copy of the downlink tuple. Otherwise,
|
||||
* returns NULL.
|
||||
*/
|
||||
static IndexTuple
|
||||
gin_refind_parent(Relation rel, BlockNumber parentblkno,
|
||||
BlockNumber childblkno, BufferAccessStrategy strategy)
|
||||
{
|
||||
Buffer parentbuf;
|
||||
Page parentpage;
|
||||
OffsetNumber o,
|
||||
parent_maxoff;
|
||||
IndexTuple result = NULL;
|
||||
|
||||
parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
|
||||
strategy);
|
||||
|
||||
LockBuffer(parentbuf, GIN_SHARE);
|
||||
parentpage = BufferGetPage(parentbuf);
|
||||
|
||||
if (GinPageIsLeaf(parentpage))
|
||||
{
|
||||
UnlockReleaseBuffer(parentbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
parent_maxoff = PageGetMaxOffsetNumber(parentpage);
|
||||
for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
|
||||
{
|
||||
ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
|
||||
|
||||
if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
|
||||
{
|
||||
/* Found it! Make copy and return it */
|
||||
result = CopyIndexTuple(itup);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer(parentbuf);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static ItemId
|
||||
PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
|
||||
OffsetNumber offset)
|
||||
{
|
||||
ItemId itemid = PageGetItemId(page, offset);
|
||||
|
||||
if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
|
||||
BLCKSZ - MAXALIGN(sizeof(GinPageOpaqueData)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("line pointer points past end of tuple space in index \"%s\"",
|
||||
RelationGetRelationName(rel)),
|
||||
errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
|
||||
block, offset, ItemIdGetOffset(itemid),
|
||||
ItemIdGetLength(itemid),
|
||||
ItemIdGetFlags(itemid))));
|
||||
|
||||
/*
|
||||
* Verify that line pointer isn't LP_REDIRECT or LP_UNUSED or LP_DEAD,
|
||||
* since GIN never uses all three. Verify that line pointer has storage,
|
||||
* too.
|
||||
*/
|
||||
if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
|
||||
ItemIdIsDead(itemid) || ItemIdGetLength(itemid) == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("invalid line pointer storage in index \"%s\"",
|
||||
RelationGetRelationName(rel)),
|
||||
errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
|
||||
block, offset, ItemIdGetOffset(itemid),
|
||||
ItemIdGetLength(itemid),
|
||||
ItemIdGetFlags(itemid))));
|
||||
|
||||
return itemid;
|
||||
}
|
@ -188,6 +188,26 @@ ORDER BY c.relpages DESC LIMIT 10;
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>
|
||||
<function>gin_index_check(index regclass) returns void</function>
|
||||
<indexterm>
|
||||
<primary>gin_index_check</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<function>gin_index_check</function> tests that its target GIN index
|
||||
has consistent parent-child tuples relations (no parent tuples
|
||||
require tuple adjustement) and page graph respects balanced-tree
|
||||
invariants (internal pages reference only leaf page or only internal
|
||||
pages).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
<tip>
|
||||
<para>
|
||||
|
@ -1053,8 +1053,10 @@ GinPageOpaque
|
||||
GinPageOpaqueData
|
||||
GinPlaceToPageRC
|
||||
GinPostingList
|
||||
GinPostingTreeScanItem
|
||||
GinQualCounts
|
||||
GinScanEntry
|
||||
GinScanItem
|
||||
GinScanKey
|
||||
GinScanOpaque
|
||||
GinScanOpaqueData
|
||||
|
Loading…
x
Reference in New Issue
Block a user