amcheck: Move common routines into a separate module

Before performing checks on an index, we need to take some safety
measures that apply to all index AMs. This includes:

* verifying that the index can be checked - Only selected AMs are
supported by amcheck (right now only B-Tree). The index has to be
valid and not a temporary index from another session.

* changing (and then restoring) user's security context

* obtaining proper locks on the index (and table, if needed)

* discarding GUC changes from the index functions

Until now this was implemented in the B-Tree amcheck module, but it's
something every AM will have to do. So relocate the code into a new
module verify_common for reuse.

The shared steps are implemented by amcheck_lock_relation_and_check(),
receiving the AM-specific verification as a callback. Custom parameters
may be supplied using a pointer.

Author: Andrey Borodin <amborodin@acm.org>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas@vondra.me>
Reviewed-By: Mark Dilger <mark.dilger@enterprisedb.com>
Reviewed-By: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
This commit is contained in:
Tomas Vondra 2025-03-29 15:14:47 +01:00
parent fb9dff7663
commit d70b17636d
7 changed files with 297 additions and 199 deletions

View File

@ -3,6 +3,7 @@
MODULE_big = amcheck MODULE_big = amcheck
OBJS = \ OBJS = \
$(WIN32RES) \ $(WIN32RES) \
verify_common.o \
verify_heapam.o \ verify_heapam.o \
verify_nbtree.o verify_nbtree.o

View File

@ -57,8 +57,8 @@ ERROR: could not open relation with OID 17
BEGIN; BEGIN;
CREATE INDEX bttest_a_brin_idx ON bttest_a USING brin(id); CREATE INDEX bttest_a_brin_idx ON bttest_a USING brin(id);
SELECT bt_index_parent_check('bttest_a_brin_idx'); SELECT bt_index_parent_check('bttest_a_brin_idx');
ERROR: only B-Tree indexes are supported as targets for verification ERROR: expected "btree" index as targets for verification
DETAIL: Relation "bttest_a_brin_idx" is not a B-Tree index. DETAIL: Relation "bttest_a_brin_idx" is a brin index.
ROLLBACK; ROLLBACK;
-- normal check outside of xact -- normal check outside of xact
SELECT bt_index_check('bttest_a_idx'); SELECT bt_index_check('bttest_a_idx');

View File

@ -1,6 +1,7 @@
# Copyright (c) 2022-2025, PostgreSQL Global Development Group # Copyright (c) 2022-2025, PostgreSQL Global Development Group
amcheck_sources = files( amcheck_sources = files(
'verify_common.c',
'verify_heapam.c', 'verify_heapam.c',
'verify_nbtree.c', 'verify_nbtree.c',
) )

View File

@ -0,0 +1,191 @@
/*-------------------------------------------------------------------------
*
* verify_common.c
* Utility functions common to all access methods.
*
* Copyright (c) 2016-2025, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/amcheck/verify_common.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "commands/tablecmds.h"
#include "utils/guc.h"
#include "utils/syscache.h"
static bool amcheck_index_mainfork_expected(Relation rel);
/*
* Check if index relation should have a file for its main relation fork.
* Verification uses this to skip unlogged indexes when in hot standby mode,
* where there is simply nothing to verify.
*
* NB: Caller should call index_checkable() before calling here.
*/
static bool
amcheck_index_mainfork_expected(Relation rel)
{
if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
!RecoveryInProgress())
return true;
ereport(NOTICE,
(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
RelationGetRelationName(rel))));
return false;
}
/*
* Amcheck main workhorse.
* Given index relation OID, lock relation.
* Next, take a number of standard actions:
* 1) Make sure the index can be checked
* 2) change the context of the user,
* 3) keep track of GUCs modified via index functions
* 4) execute callback function to verify integrity.
*/
void
amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode,
void *state)
{
Oid heapid;
Relation indrel;
Relation heaprel;
Oid save_userid;
int save_sec_context;
int save_nestlevel;
/*
* We must lock table before index to avoid deadlocks. However, if the
* passed indrelid isn't an index then IndexGetRelation() will fail.
* Rather than emitting a not-very-helpful error message, postpone
* complaining, expecting that the is-it-an-index test below will fail.
*
* In hot standby mode this will raise an error when parentcheck is true.
*/
heapid = IndexGetRelation(indrelid, true);
if (OidIsValid(heapid))
{
heaprel = table_open(heapid, lockmode);
/*
* Switch to the table owner's userid, so that any index functions are
* run as that user. Also lock down security-restricted operations
* and arrange to make GUC variable changes local to this command.
*/
GetUserIdAndSecContext(&save_userid, &save_sec_context);
SetUserIdAndSecContext(heaprel->rd_rel->relowner,
save_sec_context | SECURITY_RESTRICTED_OPERATION);
save_nestlevel = NewGUCNestLevel();
}
else
{
heaprel = NULL;
/* Set these just to suppress "uninitialized variable" warnings */
save_userid = InvalidOid;
save_sec_context = -1;
save_nestlevel = -1;
}
/*
* Open the target index relations separately (like relation_openrv(), but
* with heap relation locked first to prevent deadlocking). In hot
* standby mode this will raise an error when parentcheck is true.
*
* There is no need for the usual indcheckxmin usability horizon test
* here, even in the heapallindexed case, because index undergoing
* verification only needs to have entries for a new transaction snapshot.
* (If this is a parentcheck verification, there is no question about
* committed or recently dead heap tuples lacking index entries due to
* concurrent activity.)
*/
indrel = index_open(indrelid, lockmode);
/*
* Since we did the IndexGetRelation call above without any lock, it's
* barely possible that a race against an index drop/recreation could have
* netted us the wrong table.
*/
if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_TABLE),
errmsg("could not open parent table of index \"%s\"",
RelationGetRelationName(indrel))));
/* Check that relation suitable for checking */
if (index_checkable(indrel, am_id))
check(indrel, heaprel, state, lockmode == ShareLock);
/* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel);
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
/*
* Release locks early. That's ok here because nothing in the called
* routines will trigger shared cache invalidations to be sent, so we can
* relax the usual pattern of only releasing locks after commit.
*/
index_close(indrel, lockmode);
if (heaprel)
table_close(heaprel, lockmode);
}
/*
* Basic checks about the suitability of a relation for checking as an index.
*
*
* NB: Intentionally not checking permissions, the function is normally not
* callable by non-superusers. If granted, it's useful to be able to check a
* whole cluster.
*/
bool
index_checkable(Relation rel, Oid am_id)
{
if (rel->rd_rel->relkind != RELKIND_INDEX ||
rel->rd_rel->relam != am_id)
{
HeapTuple amtup;
HeapTuple amtuprel;
amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(am_id));
amtuprel = SearchSysCache1(AMOID, ObjectIdGetDatum(rel->rd_rel->relam));
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("expected \"%s\" index as targets for verification", NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)),
errdetail("Relation \"%s\" is a %s index.",
RelationGetRelationName(rel), NameStr(((Form_pg_am) GETSTRUCT(amtuprel))->amname))));
}
if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary tables of other sessions"),
errdetail("Index \"%s\" is associated with temporary relation.",
RelationGetRelationName(rel))));
if (!rel->rd_index->indisvalid)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot check index \"%s\"",
RelationGetRelationName(rel)),
errdetail("Index is not valid.")));
return amcheck_index_mainfork_expected(rel);
}

View File

@ -0,0 +1,31 @@
/*-------------------------------------------------------------------------
*
* amcheck.h
* Shared routines for amcheck verifications.
*
* Copyright (c) 2016-2025, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/amcheck/amcheck.h
*
*-------------------------------------------------------------------------
*/
#include "storage/bufpage.h"
#include "storage/lmgr.h"
#include "storage/lockdefs.h"
#include "utils/relcache.h"
#include "miscadmin.h"
/* Typedefs for callback functions for amcheck_lock_relation */
typedef void (*IndexCheckableCallback) (Relation index);
typedef void (*IndexDoCheckCallback) (Relation rel,
Relation heaprel,
void *state,
bool readonly);
extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
extern bool index_checkable(Relation rel, Oid am_id);

View File

@ -30,6 +30,7 @@
#include "access/tableam.h" #include "access/tableam.h"
#include "access/transam.h" #include "access/transam.h"
#include "access/xact.h" #include "access/xact.h"
#include "verify_common.h"
#include "catalog/index.h" #include "catalog/index.h"
#include "catalog/pg_am.h" #include "catalog/pg_am.h"
#include "catalog/pg_opfamily_d.h" #include "catalog/pg_opfamily_d.h"
@ -159,14 +160,22 @@ typedef struct BtreeLastVisibleEntry
ItemPointer tid; /* Heap tid */ ItemPointer tid; /* Heap tid */
} BtreeLastVisibleEntry; } BtreeLastVisibleEntry;
/*
* arguments for the bt_index_check_callback callback
*/
typedef struct BTCallbackState
{
bool parentcheck;
bool heapallindexed;
bool rootdescend;
bool checkunique;
} BTCallbackState;
PG_FUNCTION_INFO_V1(bt_index_check); PG_FUNCTION_INFO_V1(bt_index_check);
PG_FUNCTION_INFO_V1(bt_index_parent_check); PG_FUNCTION_INFO_V1(bt_index_parent_check);
static void bt_index_check_internal(Oid indrelid, bool parentcheck, static void bt_index_check_callback(Relation indrel, Relation heaprel,
bool heapallindexed, bool rootdescend, void *state, bool readonly);
bool checkunique);
static inline void btree_index_checkable(Relation rel);
static inline bool btree_index_mainfork_expected(Relation rel);
static void bt_check_every_level(Relation rel, Relation heaprel, static void bt_check_every_level(Relation rel, Relation heaprel,
bool heapkeyspace, bool readonly, bool heapallindexed, bool heapkeyspace, bool readonly, bool heapallindexed,
bool rootdescend, bool checkunique); bool rootdescend, bool checkunique);
@ -241,15 +250,21 @@ Datum
bt_index_check(PG_FUNCTION_ARGS) bt_index_check(PG_FUNCTION_ARGS)
{ {
Oid indrelid = PG_GETARG_OID(0); Oid indrelid = PG_GETARG_OID(0);
bool heapallindexed = false; BTCallbackState args;
bool checkunique = false;
args.heapallindexed = false;
args.rootdescend = false;
args.parentcheck = false;
args.checkunique = false;
if (PG_NARGS() >= 2) if (PG_NARGS() >= 2)
heapallindexed = PG_GETARG_BOOL(1); args.heapallindexed = PG_GETARG_BOOL(1);
if (PG_NARGS() == 3) if (PG_NARGS() >= 3)
checkunique = PG_GETARG_BOOL(2); args.checkunique = PG_GETARG_BOOL(2);
bt_index_check_internal(indrelid, false, heapallindexed, false, checkunique); amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID,
bt_index_check_callback,
AccessShareLock, &args);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
@ -267,18 +282,23 @@ Datum
bt_index_parent_check(PG_FUNCTION_ARGS) bt_index_parent_check(PG_FUNCTION_ARGS)
{ {
Oid indrelid = PG_GETARG_OID(0); Oid indrelid = PG_GETARG_OID(0);
bool heapallindexed = false; BTCallbackState args;
bool rootdescend = false;
bool checkunique = false; args.heapallindexed = false;
args.rootdescend = false;
args.parentcheck = true;
args.checkunique = false;
if (PG_NARGS() >= 2) if (PG_NARGS() >= 2)
heapallindexed = PG_GETARG_BOOL(1); args.heapallindexed = PG_GETARG_BOOL(1);
if (PG_NARGS() >= 3) if (PG_NARGS() >= 3)
rootdescend = PG_GETARG_BOOL(2); args.rootdescend = PG_GETARG_BOOL(2);
if (PG_NARGS() == 4) if (PG_NARGS() >= 4)
checkunique = PG_GETARG_BOOL(3); args.checkunique = PG_GETARG_BOOL(3);
bt_index_check_internal(indrelid, true, heapallindexed, rootdescend, checkunique); amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID,
bt_index_check_callback,
ShareLock, &args);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
@ -287,193 +307,46 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
* Helper for bt_index_[parent_]check, coordinating the bulk of the work. * Helper for bt_index_[parent_]check, coordinating the bulk of the work.
*/ */
static void static void
bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, bt_index_check_callback(Relation indrel, Relation heaprel, void *state, bool readonly)
bool rootdescend, bool checkunique)
{ {
Oid heapid; BTCallbackState *args = (BTCallbackState *) state;
Relation indrel; bool heapkeyspace,
Relation heaprel; allequalimage;
LOCKMODE lockmode;
Oid save_userid;
int save_sec_context;
int save_nestlevel;
if (parentcheck) if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM))
lockmode = ShareLock;
else
lockmode = AccessShareLock;
/*
* We must lock table before index to avoid deadlocks. However, if the
* passed indrelid isn't an index then IndexGetRelation() will fail.
* Rather than emitting a not-very-helpful error message, postpone
* complaining, expecting that the is-it-an-index test below will fail.
*
* In hot standby mode this will raise an error when parentcheck is true.
*/
heapid = IndexGetRelation(indrelid, true);
if (OidIsValid(heapid))
{
heaprel = table_open(heapid, lockmode);
/*
* Switch to the table owner's userid, so that any index functions are
* run as that user. Also lock down security-restricted operations
* and arrange to make GUC variable changes local to this command.
*/
GetUserIdAndSecContext(&save_userid, &save_sec_context);
SetUserIdAndSecContext(heaprel->rd_rel->relowner,
save_sec_context | SECURITY_RESTRICTED_OPERATION);
save_nestlevel = NewGUCNestLevel();
RestrictSearchPath();
}
else
{
heaprel = NULL;
/* Set these just to suppress "uninitialized variable" warnings */
save_userid = InvalidOid;
save_sec_context = -1;
save_nestlevel = -1;
}
/*
* Open the target index relations separately (like relation_openrv(), but
* with heap relation locked first to prevent deadlocking). In hot
* standby mode this will raise an error when parentcheck is true.
*
* There is no need for the usual indcheckxmin usability horizon test
* here, even in the heapallindexed case, because index undergoing
* verification only needs to have entries for a new transaction snapshot.
* (If this is a parentcheck verification, there is no question about
* committed or recently dead heap tuples lacking index entries due to
* concurrent activity.)
*/
indrel = index_open(indrelid, lockmode);
/*
* Since we did the IndexGetRelation call above without any lock, it's
* barely possible that a race against an index drop/recreation could have
* netted us the wrong table.
*/
if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_TABLE), (errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("could not open parent table of index \"%s\"", errmsg("index \"%s\" lacks a main relation fork",
RelationGetRelationName(indrel)))); RelationGetRelationName(indrel))));
/* Relation suitable for checking as B-Tree? */ /* Extract metadata from metapage, and sanitize it in passing */
btree_index_checkable(indrel); _bt_metaversion(indrel, &heapkeyspace, &allequalimage);
if (allequalimage && !heapkeyspace)
if (btree_index_mainfork_expected(indrel)) ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version",
RelationGetRelationName(indrel))));
if (allequalimage && !_bt_allequalimage(indrel, false))
{ {
bool heapkeyspace, bool has_interval_ops = false;
allequalimage;
if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++)
ereport(ERROR, if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID)
(errcode(ERRCODE_INDEX_CORRUPTED), {
errmsg("index \"%s\" lacks a main relation fork", has_interval_ops = true;
RelationGetRelationName(indrel)))); ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
/* Extract metadata from metapage, and sanitize it in passing */ errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
_bt_metaversion(indrel, &heapkeyspace, &allequalimage); RelationGetRelationName(indrel)),
if (allequalimage && !heapkeyspace) has_interval_ops
ereport(ERROR, ? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.")
(errcode(ERRCODE_INDEX_CORRUPTED), : 0));
errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", }
RelationGetRelationName(indrel))));
if (allequalimage && !_bt_allequalimage(indrel, false))
{
bool has_interval_ops = false;
for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++)
if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID)
has_interval_ops = true;
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
RelationGetRelationName(indrel)),
has_interval_ops
? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.")
: 0));
}
/* Check index, possibly against table it is an index on */
bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,
heapallindexed, rootdescend, checkunique);
} }
/* Roll back any GUC changes executed by index functions */ /* Check index, possibly against table it is an index on */
AtEOXact_GUC(false, save_nestlevel); bt_check_every_level(indrel, heaprel, heapkeyspace, readonly,
args->heapallindexed, args->rootdescend, args->checkunique);
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
/*
* Release locks early. That's ok here because nothing in the called
* routines will trigger shared cache invalidations to be sent, so we can
* relax the usual pattern of only releasing locks after commit.
*/
index_close(indrel, lockmode);
if (heaprel)
table_close(heaprel, lockmode);
}
/*
* Basic checks about the suitability of a relation for checking as a B-Tree
* index.
*
* NB: Intentionally not checking permissions, the function is normally not
* callable by non-superusers. If granted, it's useful to be able to check a
* whole cluster.
*/
static inline void
btree_index_checkable(Relation rel)
{
if (rel->rd_rel->relkind != RELKIND_INDEX ||
rel->rd_rel->relam != BTREE_AM_OID)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("only B-Tree indexes are supported as targets for verification"),
errdetail("Relation \"%s\" is not a B-Tree index.",
RelationGetRelationName(rel))));
if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary tables of other sessions"),
errdetail("Index \"%s\" is associated with temporary relation.",
RelationGetRelationName(rel))));
if (!rel->rd_index->indisvalid)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot check index \"%s\"",
RelationGetRelationName(rel)),
errdetail("Index is not valid.")));
}
/*
* Check if B-Tree index relation should have a file for its main relation
* fork. Verification uses this to skip unlogged indexes when in hot standby
* mode, where there is simply nothing to verify. We behave as if the
* relation is empty.
*
* NB: Caller should call btree_index_checkable() before calling here.
*/
static inline bool
btree_index_mainfork_expected(Relation rel)
{
if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
!RecoveryInProgress())
return true;
ereport(DEBUG1,
(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
RelationGetRelationName(rel))));
return false;
} }
/* /*

View File

@ -194,6 +194,7 @@ BOOLEAN
BOX BOX
BTArrayKeyInfo BTArrayKeyInfo
BTBuildState BTBuildState
BTCallbackState
BTCycleId BTCycleId
BTDedupInterval BTDedupInterval
BTDedupState BTDedupState