diff --git a/contrib/pg_visibility/Makefile b/contrib/pg_visibility/Makefile index b3b1a89e47d..d3cb411cc90 100644 --- a/contrib/pg_visibility/Makefile +++ b/contrib/pg_visibility/Makefile @@ -11,6 +11,7 @@ DATA = pg_visibility--1.1.sql pg_visibility--1.1--1.2.sql \ PGFILEDESC = "pg_visibility - page visibility information" REGRESS = pg_visibility +TAP_TESTS = 1 ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/pg_visibility/meson.build b/contrib/pg_visibility/meson.build index 38e242d95c0..f932371f62d 100644 --- a/contrib/pg_visibility/meson.build +++ b/contrib/pg_visibility/meson.build @@ -32,5 +32,9 @@ tests += { 'sql': [ 'pg_visibility', ], + 'tap': { + 'tests': [ + 't/001_concurrent_transaction.pl', + ], }, } diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 17c50a44722..1a1a4ff7be7 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -19,6 +19,7 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "storage/proc.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/rel.h" @@ -532,6 +533,63 @@ collect_visibility_data(Oid relid, bool include_pd) return info; } +/* + * The "strict" version of GetOldestNonRemovableTransactionId(). The + * pg_visibility check can tolerate false positives (don't report some of the + * errors), but can't tolerate false negatives (report false errors). Normally, + * horizons move forwards, but there are cases when it could move backward + * (see comment for ComputeXidHorizons()). + * + * This is why we have to implement our own function for xid horizon, which + * would be guaranteed to be newer or equal to any xid horizon computed before. + * We have to do the following to achieve this. + * + * 1. Ignore processes xmin's, because they consider connection to other + * databases that were ignored before. + * 2. Ignore KnownAssignedXids, because they are not database-aware. At the + * same time, the primary could compute its horizons database-aware. + * 3. Ignore walsender xmin, because it could go backward if some replication + * connections don't use replication slots. + * + * As a result, we're using only currently running xids to compute the horizon. + * Surely these would significantly sacrifice accuracy. But we have to do so + * to avoid reporting false errors. + */ +static TransactionId +GetStrictOldestNonRemovableTransactionId(Relation rel) +{ + RunningTransactions runningTransactions; + + if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress()) + { + /* Shared relation: take into account all running xids */ + runningTransactions = GetRunningTransactionData(); + LWLockRelease(ProcArrayLock); + LWLockRelease(XidGenLock); + return runningTransactions->oldestRunningXid; + } + else if (!RELATION_IS_LOCAL(rel)) + { + /* + * Normal relation: take into account xids running within the current + * database + */ + runningTransactions = GetRunningTransactionData(); + LWLockRelease(ProcArrayLock); + LWLockRelease(XidGenLock); + return runningTransactions->oldestDatabaseRunningXid; + } + else + { + /* + * For temporary relations, ComputeXidHorizons() uses only + * TransamVariables->latestCompletedXid and MyProc->xid. These two + * shouldn't go backwards. So we're fine with this horizon. + */ + return GetOldestNonRemovableTransactionId(rel); + } +} + /* * Returns a list of items whose visibility map information does not match * the status of the tuples on the page. @@ -563,7 +621,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) check_relation_relkind(rel); if (all_visible) - OldestXmin = GetOldestNonRemovableTransactionId(rel); + OldestXmin = GetStrictOldestNonRemovableTransactionId(rel); nblocks = RelationGetNumberOfBlocks(rel); @@ -671,11 +729,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) * retake ProcArrayLock here while we're holding the buffer * exclusively locked, but it should be safe against * deadlocks, because surely - * GetOldestNonRemovableTransactionId() should never take a - * buffer lock. And this shouldn't happen often, so it's worth - * being careful so as to avoid false positives. + * GetStrictOldestNonRemovableTransactionId() should never + * take a buffer lock. And this shouldn't happen often, so + * it's worth being careful so as to avoid false positives. */ - RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel); + RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel); if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin)) record_corrupt_item(items, &tuple.t_self); diff --git a/contrib/pg_visibility/t/001_concurrent_transaction.pl b/contrib/pg_visibility/t/001_concurrent_transaction.pl new file mode 100644 index 00000000000..c31d041757d --- /dev/null +++ b/contrib/pg_visibility/t/001_concurrent_transaction.pl @@ -0,0 +1,47 @@ + +# Copyright (c) 2021-2024, PostgreSQL Global Development Group + +# Check that a concurrent transaction doesn't cause false negatives in +# pg_check_visible() function +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + + +my $node = PostgreSQL::Test::Cluster->new('main'); + +$node->init; +$node->start; + +# Setup another database +$node->safe_psql("postgres", "CREATE DATABASE other_database;\n"); +my $bsession = $node->background_psql('other_database'); + +# Run a concurrent transaction +$bsession->query_safe( + qq[ + BEGIN; + SELECT txid_current(); +]); + +# Create a sample table and run vacuum +$node->safe_psql("postgres", + "CREATE EXTENSION pg_visibility;\n" + . "CREATE TABLE vacuum_test AS SELECT 42 i;\n" + . "VACUUM (disable_page_skipping) vacuum_test;"); + +# Run pg_check_visible() +my $result = $node->safe_psql("postgres", + "SELECT * FROM pg_check_visible('vacuum_test');"); + +# There should be no false negatives +ok($result eq "", "pg_check_visible() detects no errors"); + +# Shutdown +$bsession->query_safe("COMMIT;"); +$bsession->quit; +$node->stop; + +done_testing(); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 9eea1ed315a..b3cd248fb64 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2688,6 +2688,7 @@ GetRunningTransactionData(void) RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData; TransactionId latestCompletedXid; TransactionId oldestRunningXid; + TransactionId oldestDatabaseRunningXid; TransactionId *xids; int index; int count; @@ -2732,7 +2733,7 @@ GetRunningTransactionData(void) latestCompletedXid = XidFromFullTransactionId(TransamVariables->latestCompletedXid); - oldestRunningXid = + oldestDatabaseRunningXid = oldestRunningXid = XidFromFullTransactionId(TransamVariables->nextXid); /* @@ -2740,6 +2741,8 @@ GetRunningTransactionData(void) */ for (index = 0; index < arrayP->numProcs; index++) { + int pgprocno = arrayP->pgprocnos[index]; + PGPROC *proc = &allProcs[pgprocno]; TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ @@ -2760,6 +2763,13 @@ GetRunningTransactionData(void) if (TransactionIdPrecedes(xid, oldestRunningXid)) oldestRunningXid = xid; + /* + * Also, update the oldest running xid within the current database. + */ + if (proc->databaseId == MyDatabaseId && + TransactionIdPrecedes(xid, oldestRunningXid)) + oldestDatabaseRunningXid = xid; + if (ProcGlobal->subxidStates[index].overflowed) suboverflowed = true; @@ -2826,6 +2836,7 @@ GetRunningTransactionData(void) CurrentRunningXacts->subxid_overflow = suboverflowed; CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid); CurrentRunningXacts->oldestRunningXid = oldestRunningXid; + CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid; CurrentRunningXacts->latestCompletedXid = latestCompletedXid; Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 534d56fbc9f..0fc0804e266 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -82,6 +82,8 @@ typedef struct RunningTransactionsData bool subxid_overflow; /* snapshot overflowed, subxids missing */ TransactionId nextXid; /* xid from TransamVariables->nextXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ + TransactionId oldestDatabaseRunningXid; /* same as above, but within the + * current database */ TransactionId latestCompletedXid; /* so we can set xmax */ TransactionId *xids; /* array of (sub)xids still running */