Support RBM_ZERO_AND_CLEANUP_LOCK in ExtendBufferedRelTo(), add tests

For some reason I had not implemented RBM_ZERO_AND_CLEANUP_LOCK support in ExtendBufferedRelTo(), likely thinking it not being reachable. But it is reachable, e.g. when replaying a WAL record for a page in a relation that subsequently is truncated (likely only reachable when doing crash recovery or PITR, not during ongoing streaming replication). As now all of the RBM_* modes are supported, remove assertions checking mode. As we had no test coverage for this scenario, add a new TAP test. There's plenty more that ought to be tested in this area... Reported-by: Tom Lane <tgl@sss.pgh.pa.us> Reported-by: Alexander Lakhin <exclusion@gmail.com> Reviewed-by: Alexander Lakhin <exclusion@gmail.com> Discussion: https://postgr.es/m/392271.1681238924%40sss.pgh.pa.us Discussion: https://postgr.es/m/0b5eb82b-cb99-e0a4-b932-3dc60e2e3926@gmail.com
2025-10-31 00:03:57 -04:00 · 2023-04-14 11:30:20 -07:00 · 2023-04-14 11:30:20 -07:00 · 43a33ef54e
commit 43a33ef54e
parent e4d905f772
3 changed files with 154 additions and 11 deletions
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@ -888,8 +888,6 @@ ExtendBufferedRelTo(ExtendBufferedWhat eb,
 	Assert((eb.rel != NULL) != (eb.smgr != NULL));
 	Assert(eb.smgr == NULL || eb.relpersistence != 0);
 	Assert(extend_to != InvalidBlockNumber && extend_to > 0);
 	Assert(mode == RBM_NORMAL || mode == RBM_ZERO_ON_ERROR ||
 		   mode == RBM_ZERO_AND_LOCK);
 	if (eb.smgr == NULL)
 	{
@ -933,7 +931,13 @@ ExtendBufferedRelTo(ExtendBufferedWhat eb,
 	 */
 	current_size = smgrnblocks(eb.smgr, fork);
-	if (mode == RBM_ZERO_AND_LOCK)
+	/*
 	 * Since no-one else can be looking at the page contents yet, there is no
 	 * difference between an exclusive lock and a cleanup-strength lock. Note
 	 * that we pass the original mode to ReadBuffer_common() below, when
 	 * falling back to reading the buffer to a concurrent relation extension.
 	 */
 	if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
 		flags |= EB_LOCK_TARGET;
 	while (current_size < extend_to)
@ -1008,11 +1012,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	{
 		uint32		flags = EB_SKIP_EXTENSION_LOCK;
-		Assert(mode == RBM_NORMAL ||
+		/*
-			   mode == RBM_ZERO_AND_LOCK ||
+		 * Since no-one else can be looking at the page contents yet, there is
-			   mode == RBM_ZERO_ON_ERROR);
+		 * no difference between an exclusive lock and a cleanup-strength
-
+		 * lock.
-		if (mode == RBM_ZERO_AND_LOCK)
+		 */
 		if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
 			flags |= EB_LOCK_FIRST;
 		return ExtendBufferedRel(EB_SMGR(smgr, relpersistence),
@ -1145,9 +1150,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	}
 	/*
-	 * In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
+	 * In RBM_ZERO_AND_LOCK / RBM_ZERO_AND_CLEANUP_LOCK mode, grab the buffer
-	 * the page as valid, to make sure that no other backend sees the zeroed
+	 * content lock before marking the page as valid, to make sure that no
-	 * page before the caller has had a chance to initialize it.
+	 * other backend sees the zeroed page before the caller has had a chance
 	 * to initialize it.
 	 *
 	 * Since no-one else can be looking at the page contents yet, there is no
 	 * difference between an exclusive lock and a cleanup-strength lock. (Note
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@ -41,6 +41,7 @@ tests += {
      't/033_replay_tsp_drops.pl',
      't/034_create_database.pl',
      't/035_standby_logical_decoding.pl',
      't/036_truncated_dropped.pl',
    ],
  },
 }
--- a/src/test/recovery/t/036_truncated_dropped.pl
+++ b/src/test/recovery/t/036_truncated_dropped.pl
@ -0,0 +1,136 @@
 # Copyright (c) 2021-2023, PostgreSQL Global Development Group
 # Tests recovery scenarios where the files are shorter than in the common
 # cases, e.g. due to replaying WAL records of a relation that was subsequently
 # truncated or dropped.
 use strict;
 use warnings;
 use PostgreSQL::Test::Cluster;
 use Test::More;
 my $node = PostgreSQL::Test::Cluster->new('n1');
 $node->init();
 # Disable autovacuum to guarantee VACUUM can remove rows / truncate relations
 $node->append_conf(
 	'postgresql.conf', qq[
 wal_level = 'replica'
 autovacuum = off
 ]);
 $node->start();
 # Test: Replay replay of PRUNE records for a pre-existing, then dropped,
 # relation
 $node->safe_psql(
 	'postgres', qq[
 CREATE TABLE truncme(i int) WITH (fillfactor = 50);
 INSERT INTO truncme SELECT generate_series(1, 1000);
 UPDATE truncme SET i = 1;
 CHECKPOINT; -- ensure relation exists at start of recovery
 VACUUM truncme; -- generate prune records
 DROP TABLE truncme;
 ]);
 $node->stop('immediate');
 ok($node->start(),
 	'replay of PRUNE records for a pre-existing, then dropped, relation');
 # Test: Replay of PRUNE records for a newly created, then dropped, relation
 $node->safe_psql(
 	'postgres', qq[
 CREATE TABLE truncme(i int) WITH (fillfactor = 50);
 INSERT INTO truncme SELECT generate_series(1, 1000);
 UPDATE truncme SET i = 1;
 VACUUM truncme; -- generate prune records
 DROP TABLE truncme;
 ]);
 $node->stop('immediate');
 ok($node->start(),
 	'replay of PRUNE records for a newly created, then dropped, relation');
 # Test: Replay of PRUNE records affecting truncated block. With FPIs used for
 # PRUNE.
 $node->safe_psql(
 	'postgres', qq[
 CREATE TABLE truncme(i int) WITH (fillfactor = 50);
 INSERT INTO truncme SELECT generate_series(1, 1000);
 UPDATE truncme SET i = 1;
 CHECKPOINT; -- generate FPIs
 VACUUM truncme; -- generate prune records
 TRUNCATE truncme; -- make blocks non-existing
 INSERT INTO truncme SELECT generate_series(1, 10);
 ]);
 $node->stop('immediate');
 ok($node->start(),
 	'replay of PRUNE records affecting truncated block (FPIs)');
 is($node->safe_psql('postgres', 'select count(*), sum(i) FROM truncme'),
 	'10|55', 'table contents as expected after recovery');
 $node->safe_psql('postgres', 'DROP TABLE truncme');
 # Test replay of PRUNE records for blocks that are later truncated. Without
 # FPIs used for PRUNE.
 $node->safe_psql(
 	'postgres', qq[
 CREATE TABLE truncme(i int) WITH (fillfactor = 50);
 INSERT INTO truncme SELECT generate_series(1, 1000);
 UPDATE truncme SET i = 1;
 VACUUM truncme; -- generate prune records
 TRUNCATE truncme; -- make blocks non-existing
 INSERT INTO truncme SELECT generate_series(1, 10);
 ]);
 $node->stop('immediate');
 ok($node->start(),
 	'replay of PRUNE records affecting truncated block (no FPIs)');
 is($node->safe_psql('postgres', 'select count(*), sum(i) FROM truncme'),
 	'10|55', 'table contents as expected after recovery');
 $node->safe_psql('postgres', 'DROP TABLE truncme');
 # Test: Replay of partial truncation via VACUUM
 $node->safe_psql(
 	'postgres', qq[
 CREATE TABLE truncme(i int) WITH (fillfactor = 50);
 INSERT INTO truncme SELECT generate_series(1, 1000);
 UPDATE truncme SET i = i + 1;
 -- ensure a mix of pre/post truncation rows
 DELETE FROM truncme WHERE i > 500;
 VACUUM truncme; -- should truncate relation
 -- rows at TIDs that previously existed
 INSERT INTO truncme SELECT generate_series(1000, 1010);
 ]);
 $node->stop('immediate');
 ok($node->start(), 'replay of partial truncation via VACUUM');
 is( $node->safe_psql(
 		'postgres', 'select count(*), sum(i), min(i), max(i) FROM truncme'),
 	'510|136304|2|1010',
 	'table contents as expected after recovery');
 $node->safe_psql('postgres', 'DROP TABLE truncme');
 done_testing();