mirror of
https://github.com/postgres/postgres.git
synced 2025-05-14 00:03:46 -04:00
amcheck: Distinguish interrupted page deletion from corruption.
This prevents false-positive reports about "the first child of leftmost target page is not leftmost of its level", "block %u is not leftmost" and "left link/right link pair". They appeared if amcheck ran before VACUUM cleaned things, after a cluster exited recovery between the first-stage and second-stage WAL records of a deletion. Back-patch to v11 (all supported versions). Reviewed by Peter Geoghegan. Discussion: https://postgr.es/m/20231005025232.c7.nmisch@google.com
This commit is contained in:
parent
c804ffb562
commit
e04509f324
@ -127,6 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
|
|||||||
bool readonly, bool heapallindexed);
|
bool readonly, bool heapallindexed);
|
||||||
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
|
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
|
||||||
BtreeLevel level);
|
BtreeLevel level);
|
||||||
|
static bool bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
|
||||||
|
BlockNumber start,
|
||||||
|
BTPageOpaque start_opaque);
|
||||||
static void bt_target_page_check(BtreeCheckState *state);
|
static void bt_target_page_check(BtreeCheckState *state);
|
||||||
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
|
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
|
||||||
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
|
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
|
||||||
@ -716,7 +719,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
|
|||||||
*/
|
*/
|
||||||
if (state->readonly)
|
if (state->readonly)
|
||||||
{
|
{
|
||||||
if (!P_LEFTMOST(opaque))
|
if (!bt_leftmost_ignoring_half_dead(state, current, opaque))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||||
errmsg("block %u is not leftmost in index \"%s\"",
|
errmsg("block %u is not leftmost in index \"%s\"",
|
||||||
@ -769,10 +772,14 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* readonly mode can only ever land on live pages and half-dead pages,
|
* Sibling links should be in mutual agreement. There arises
|
||||||
* so sibling pointers should always be in mutual agreement
|
* leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
|
||||||
|
* of the parent's low-key downlink is half-dead. (A half-dead page
|
||||||
|
* has no downlink from its parent.) Under heavyweight locking, the
|
||||||
|
* last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
|
||||||
*/
|
*/
|
||||||
if (state->readonly && opaque->btpo_prev != leftcurrent)
|
if (state->readonly &&
|
||||||
|
opaque->btpo_prev != leftcurrent && leftcurrent != P_NONE)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||||
errmsg("left link/right link pair in index \"%s\" not in agreement",
|
errmsg("left link/right link pair in index \"%s\" not in agreement",
|
||||||
@ -822,6 +829,67 @@ nextpage:
|
|||||||
return nextleveldown;
|
return nextleveldown;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
|
||||||
|
* half-dead, sibling-linked pages to the left. If a half-dead page appears
|
||||||
|
* under state->readonly, the database exited recovery between the first-stage
|
||||||
|
* and second-stage WAL records of a deletion.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
|
||||||
|
BlockNumber start,
|
||||||
|
BTPageOpaque start_opaque)
|
||||||
|
{
|
||||||
|
BlockNumber reached = start_opaque->btpo_prev,
|
||||||
|
reached_from = start;
|
||||||
|
bool all_half_dead = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To handle the !readonly case, we'd need to accept BTP_DELETED pages and
|
||||||
|
* potentially observe nbtree/README "Page deletion and backwards scans".
|
||||||
|
*/
|
||||||
|
Assert(state->readonly);
|
||||||
|
|
||||||
|
while (reached != P_NONE && all_half_dead)
|
||||||
|
{
|
||||||
|
Page page = palloc_btree_page(state, reached);
|
||||||
|
BTPageOpaque reached_opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
|
||||||
|
* writes that side-links will continue to point to the siblings.
|
||||||
|
* Check btpo_next for that property.
|
||||||
|
*/
|
||||||
|
all_half_dead = P_ISHALFDEAD(reached_opaque) &&
|
||||||
|
reached != start &&
|
||||||
|
reached != reached_from &&
|
||||||
|
reached_opaque->btpo_next == reached_from;
|
||||||
|
if (all_half_dead)
|
||||||
|
{
|
||||||
|
XLogRecPtr pagelsn = PageGetLSN(page);
|
||||||
|
|
||||||
|
/* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
|
||||||
|
ereport(DEBUG1,
|
||||||
|
(errcode(ERRCODE_NO_DATA),
|
||||||
|
errmsg_internal("harmless interrupted page deletion detected in index \"%s\"",
|
||||||
|
RelationGetRelationName(state->rel)),
|
||||||
|
errdetail_internal("Block=%u right block=%u page lsn=%X/%X.",
|
||||||
|
reached, reached_from,
|
||||||
|
(uint32) (pagelsn >> 32),
|
||||||
|
(uint32) pagelsn)));
|
||||||
|
|
||||||
|
reached_from = reached;
|
||||||
|
reached = reached_opaque->btpo_prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
return all_half_dead;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Function performs the following checks on target page, or pages ancillary to
|
* Function performs the following checks on target page, or pages ancillary to
|
||||||
* target page:
|
* target page:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user