mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-04 00:02:52 -05:00 
			
		
		
		
	Teach verify_heapam() to validate update chains within a page.
Prior to this commit, we only consider each tuple or line pointer on the page in isolation, but now we can do some validation of a line pointer against its successor. For example, a redirect line pointer shouldn't point to another redirect line pointer, and if a tuple is HOT-updated, the result should be a heap-only tuple. Himanshu Upadhyaya and Robert Haas, reviewed by Aleksander Alekseev, Andres Freund, and Peter Geoghegan.
This commit is contained in:
		
							parent
							
								
									d69c404c4c
								
							
						
					
					
						commit
						bbc1376b39
					
				@ -150,7 +150,9 @@ typedef struct HeapCheckContext
 | 
			
		||||
} HeapCheckContext;
 | 
			
		||||
 | 
			
		||||
/* Internal implementation */
 | 
			
		||||
static void check_tuple(HeapCheckContext *ctx);
 | 
			
		||||
static void check_tuple(HeapCheckContext *ctx,
 | 
			
		||||
						bool *xmin_commit_status_ok,
 | 
			
		||||
						XidCommitStatus *xmin_commit_status);
 | 
			
		||||
static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
 | 
			
		||||
							  ToastedAttribute *ta, int32 *expected_chunk_seq,
 | 
			
		||||
							  uint32 extsize);
 | 
			
		||||
@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
 | 
			
		||||
									ToastedAttribute *ta);
 | 
			
		||||
 | 
			
		||||
static bool check_tuple_header(HeapCheckContext *ctx);
 | 
			
		||||
static bool check_tuple_visibility(HeapCheckContext *ctx);
 | 
			
		||||
static bool check_tuple_visibility(HeapCheckContext *ctx,
 | 
			
		||||
								   bool *xmin_commit_status_ok,
 | 
			
		||||
								   XidCommitStatus *xmin_commit_status);
 | 
			
		||||
 | 
			
		||||
static void report_corruption(HeapCheckContext *ctx, char *msg);
 | 
			
		||||
static void report_toast_corruption(HeapCheckContext *ctx,
 | 
			
		||||
@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
 | 
			
		||||
	for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
 | 
			
		||||
	{
 | 
			
		||||
		OffsetNumber maxoff;
 | 
			
		||||
		OffsetNumber predecessor[MaxOffsetNumber];
 | 
			
		||||
		OffsetNumber successor[MaxOffsetNumber];
 | 
			
		||||
		bool		lp_valid[MaxOffsetNumber];
 | 
			
		||||
		bool		xmin_commit_status_ok[MaxOffsetNumber];
 | 
			
		||||
		XidCommitStatus	xmin_commit_status[MaxOffsetNumber];
 | 
			
		||||
 | 
			
		||||
		CHECK_FOR_INTERRUPTS();
 | 
			
		||||
 | 
			
		||||
		memset(predecessor, 0, sizeof(OffsetNumber) * MaxOffsetNumber);
 | 
			
		||||
 | 
			
		||||
		/* Optionally skip over all-frozen or all-visible blocks */
 | 
			
		||||
		if (skip_option != SKIP_PAGES_NONE)
 | 
			
		||||
		{
 | 
			
		||||
@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
 | 
			
		||||
		for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
 | 
			
		||||
			 ctx.offnum = OffsetNumberNext(ctx.offnum))
 | 
			
		||||
		{
 | 
			
		||||
			BlockNumber	nextblkno;
 | 
			
		||||
			OffsetNumber nextoffnum;
 | 
			
		||||
 | 
			
		||||
			successor[ctx.offnum] = InvalidOffsetNumber;
 | 
			
		||||
			lp_valid[ctx.offnum] = false;
 | 
			
		||||
			xmin_commit_status_ok[ctx.offnum] = false;
 | 
			
		||||
			ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
 | 
			
		||||
 | 
			
		||||
			/* Skip over unused/dead line pointers */
 | 
			
		||||
@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("line pointer redirection to unused item at offset %u",
 | 
			
		||||
											   (unsigned) rdoffnum));
 | 
			
		||||
 | 
			
		||||
				/*
 | 
			
		||||
				 * Record the fact that this line pointer has passed basic
 | 
			
		||||
				 * sanity checking, and also the offset number to which it
 | 
			
		||||
				 * points.
 | 
			
		||||
				 */
 | 
			
		||||
				lp_valid[ctx.offnum] = true;
 | 
			
		||||
				successor[ctx.offnum] = rdoffnum;
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* It should be safe to examine the tuple's header, at least */
 | 
			
		||||
			lp_valid[ctx.offnum] = true;
 | 
			
		||||
			ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
 | 
			
		||||
			ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
 | 
			
		||||
 | 
			
		||||
			/* Ok, ready to check this next tuple */
 | 
			
		||||
			check_tuple(&ctx);
 | 
			
		||||
			check_tuple(&ctx,
 | 
			
		||||
						&xmin_commit_status_ok[ctx.offnum],
 | 
			
		||||
						&xmin_commit_status[ctx.offnum]);
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If the CTID field of this tuple seems to point to another tuple
 | 
			
		||||
			 * on the same page, record that tuple as the successor of this
 | 
			
		||||
			 * one.
 | 
			
		||||
			 */
 | 
			
		||||
			nextblkno = ItemPointerGetBlockNumber(&(ctx.tuphdr)->t_ctid);
 | 
			
		||||
			nextoffnum = ItemPointerGetOffsetNumber(&(ctx.tuphdr)->t_ctid);
 | 
			
		||||
			if (nextblkno == ctx.blkno && nextoffnum != ctx.offnum)
 | 
			
		||||
				successor[ctx.offnum] = nextoffnum;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Update chain validation. Check each line pointer that's got a valid
 | 
			
		||||
		 * successor against that successor.
 | 
			
		||||
		 */
 | 
			
		||||
		ctx.attnum = -1;
 | 
			
		||||
		for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
 | 
			
		||||
			 ctx.offnum = OffsetNumberNext(ctx.offnum))
 | 
			
		||||
		{
 | 
			
		||||
			ItemId		curr_lp;
 | 
			
		||||
			ItemId		next_lp;
 | 
			
		||||
			HeapTupleHeader curr_htup;
 | 
			
		||||
			HeapTupleHeader next_htup;
 | 
			
		||||
			TransactionId curr_xmin;
 | 
			
		||||
			TransactionId curr_xmax;
 | 
			
		||||
			TransactionId next_xmin;
 | 
			
		||||
			OffsetNumber nextoffnum = successor[ctx.offnum];
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * The current line pointer may not have a successor, either
 | 
			
		||||
			 * because it's not valid or because it didn't point to anything.
 | 
			
		||||
			 * In either case, we have to give up.
 | 
			
		||||
			 *
 | 
			
		||||
			 * If the current line pointer does point to something, it's
 | 
			
		||||
			 * possible that the target line pointer isn't valid. We have to
 | 
			
		||||
			 * give up in that case, too.
 | 
			
		||||
			 */
 | 
			
		||||
			if (nextoffnum == InvalidOffsetNumber || !lp_valid[nextoffnum])
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			/* We have two valid line pointers that we can examine. */
 | 
			
		||||
			curr_lp = PageGetItemId(ctx.page, ctx.offnum);
 | 
			
		||||
			next_lp = PageGetItemId(ctx.page, nextoffnum);
 | 
			
		||||
 | 
			
		||||
			/* Handle the cases where the current line pointer is a redirect. */
 | 
			
		||||
			if (ItemIdIsRedirected(curr_lp))
 | 
			
		||||
			{
 | 
			
		||||
				/* Can't redirect to another redirect. */
 | 
			
		||||
				if (ItemIdIsRedirected(next_lp))
 | 
			
		||||
				{
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("redirected line pointer points to another redirected line pointer at offset %u",
 | 
			
		||||
											   (unsigned) nextoffnum));
 | 
			
		||||
					continue;
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				/* Can only redirect to a HOT tuple. */
 | 
			
		||||
				next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
 | 
			
		||||
				if (!HeapTupleHeaderIsHeapOnly(next_htup))
 | 
			
		||||
				{
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("redirected line pointer points to a non-heap-only tuple at offset %u",
 | 
			
		||||
											   (unsigned) nextoffnum));
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				/*
 | 
			
		||||
				 * Redirects are created by updates, so successor should be
 | 
			
		||||
				 * the result of an update.
 | 
			
		||||
				 */
 | 
			
		||||
				if ((next_htup->t_infomask & HEAP_UPDATED) == 0)
 | 
			
		||||
				{
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("redirected line pointer points to a non-heap-updated tuple at offset %u",
 | 
			
		||||
											   (unsigned) nextoffnum));
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				/* HOT chains should not intersect. */
 | 
			
		||||
				if (predecessor[nextoffnum] != InvalidOffsetNumber)
 | 
			
		||||
				{
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("redirect line pointer points to offset %u, but offset %u also points there",
 | 
			
		||||
											   (unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
 | 
			
		||||
					continue;
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				/*
 | 
			
		||||
				 * This redirect and the tuple to which it points seem to be
 | 
			
		||||
				 * part of an update chain.
 | 
			
		||||
				 */
 | 
			
		||||
				predecessor[nextoffnum] = ctx.offnum;
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If the next line pointer is a redirect, or if it's a tuple
 | 
			
		||||
			 * but the XMAX of this tuple doesn't match the XMIN of the next
 | 
			
		||||
			 * tuple, then the two aren't part of the same update chain and
 | 
			
		||||
			 * there is nothing more to do.
 | 
			
		||||
			 */
 | 
			
		||||
			if (ItemIdIsRedirected(next_lp))
 | 
			
		||||
				continue;
 | 
			
		||||
			curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp);
 | 
			
		||||
			curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup);
 | 
			
		||||
			next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
 | 
			
		||||
			next_xmin = HeapTupleHeaderGetXmin(next_htup);
 | 
			
		||||
			if (!TransactionIdIsValid(curr_xmax) ||
 | 
			
		||||
				!TransactionIdEquals(curr_xmax, next_xmin))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			/* HOT chains should not intersect. */
 | 
			
		||||
			if (predecessor[nextoffnum] != InvalidOffsetNumber)
 | 
			
		||||
			{
 | 
			
		||||
				report_corruption(&ctx,
 | 
			
		||||
								  psprintf("tuple points to new version at offset %u, but offset %u also points there",
 | 
			
		||||
										   (unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * This tuple and the tuple to which it points seem to be part
 | 
			
		||||
			 * of an update chain.
 | 
			
		||||
			 */
 | 
			
		||||
			predecessor[nextoffnum] = ctx.offnum;
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If the current tuple is marked as HOT-updated, then the next
 | 
			
		||||
			 * tuple should be marked as a heap-only tuple. Conversely, if the
 | 
			
		||||
			 * current tuple isn't marked as HOT-updated, then the next tuple
 | 
			
		||||
			 * shouldn't be marked as a heap-only tuple.
 | 
			
		||||
			 */
 | 
			
		||||
			if (!HeapTupleHeaderIsHotUpdated(curr_htup) &&
 | 
			
		||||
				HeapTupleHeaderIsHeapOnly(next_htup))
 | 
			
		||||
			{
 | 
			
		||||
				report_corruption(&ctx,
 | 
			
		||||
								  psprintf("non-heap-only update produced a heap-only tuple at offset %u",
 | 
			
		||||
										   (unsigned) nextoffnum));
 | 
			
		||||
			}
 | 
			
		||||
			if (HeapTupleHeaderIsHotUpdated(curr_htup) &&
 | 
			
		||||
				!HeapTupleHeaderIsHeapOnly(next_htup))
 | 
			
		||||
			{
 | 
			
		||||
				report_corruption(&ctx,
 | 
			
		||||
								  psprintf("heap-only update produced a non-heap only tuple at offset %u",
 | 
			
		||||
										   (unsigned) nextoffnum));
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If the current tuple's xmin is still in progress but the
 | 
			
		||||
			 * successor tuple's xmin is committed, that's corruption.
 | 
			
		||||
			 *
 | 
			
		||||
			 * NB: We recheck the commit status of the current tuple's xmin
 | 
			
		||||
			 * here, because it might have committed after we checked it and
 | 
			
		||||
			 * before we checked the commit status of the successor tuple's
 | 
			
		||||
			 * xmin. This should be safe because the xmin itself can't have
 | 
			
		||||
			 * changed, only its commit status.
 | 
			
		||||
			 */
 | 
			
		||||
			curr_xmin = HeapTupleHeaderGetXmin(curr_htup);
 | 
			
		||||
			if (xmin_commit_status_ok[ctx.offnum] &&
 | 
			
		||||
				xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS &&
 | 
			
		||||
				xmin_commit_status_ok[nextoffnum] &&
 | 
			
		||||
				xmin_commit_status[nextoffnum] == XID_COMMITTED &&
 | 
			
		||||
				TransactionIdIsInProgress(curr_xmin))
 | 
			
		||||
			{
 | 
			
		||||
				report_corruption(&ctx,
 | 
			
		||||
								  psprintf("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
 | 
			
		||||
										   (unsigned) curr_xmin,
 | 
			
		||||
										   (unsigned) ctx.offnum,
 | 
			
		||||
										   (unsigned) next_xmin));
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If the current tuple's xmin is aborted but the successor tuple's
 | 
			
		||||
			 * xmin is in-progress or committed, that's corruption.
 | 
			
		||||
			 */
 | 
			
		||||
			if (xmin_commit_status_ok[ctx.offnum] &&
 | 
			
		||||
				xmin_commit_status[ctx.offnum] == XID_ABORTED &&
 | 
			
		||||
				xmin_commit_status_ok[nextoffnum])
 | 
			
		||||
			{
 | 
			
		||||
				if (xmin_commit_status[nextoffnum] == XID_IN_PROGRESS)
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u",
 | 
			
		||||
											   (unsigned) curr_xmin,
 | 
			
		||||
											   (unsigned) ctx.offnum,
 | 
			
		||||
											   (unsigned) next_xmin));
 | 
			
		||||
				else if (xmin_commit_status[nextoffnum] == XID_COMMITTED)
 | 
			
		||||
					report_corruption(&ctx,
 | 
			
		||||
									  psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
 | 
			
		||||
											   (unsigned) curr_xmin,
 | 
			
		||||
											   (unsigned) ctx.offnum,
 | 
			
		||||
											   (unsigned) next_xmin));
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * An update chain can start either with a non-heap-only tuple or with
 | 
			
		||||
		 * a redirect line pointer, but not with a heap-only tuple.
 | 
			
		||||
		 *
 | 
			
		||||
		 * (This check is in a separate loop because we need the predecessor
 | 
			
		||||
		 * array to be fully populated before we can perform it.)
 | 
			
		||||
		 */
 | 
			
		||||
		for (ctx.offnum = FirstOffsetNumber;
 | 
			
		||||
			 ctx.offnum <= maxoff;
 | 
			
		||||
			 ctx.offnum = OffsetNumberNext(ctx.offnum))
 | 
			
		||||
		{
 | 
			
		||||
			if (xmin_commit_status_ok[ctx.offnum] &&
 | 
			
		||||
				(xmin_commit_status[ctx.offnum] == XID_COMMITTED ||
 | 
			
		||||
				 xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS) &&
 | 
			
		||||
				predecessor[ctx.offnum] == InvalidOffsetNumber)
 | 
			
		||||
			{
 | 
			
		||||
				ItemId		curr_lp;
 | 
			
		||||
 | 
			
		||||
				curr_lp = PageGetItemId(ctx.page, ctx.offnum);
 | 
			
		||||
				if (!ItemIdIsRedirected(curr_lp))
 | 
			
		||||
				{
 | 
			
		||||
					HeapTupleHeader curr_htup;
 | 
			
		||||
 | 
			
		||||
					curr_htup = (HeapTupleHeader)
 | 
			
		||||
						PageGetItem(ctx.page, curr_lp);
 | 
			
		||||
					if (HeapTupleHeaderIsHeapOnly(curr_htup))
 | 
			
		||||
						report_corruption(&ctx,
 | 
			
		||||
										  psprintf("tuple is root of chain but is marked as heap-only tuple"));
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* clean up */
 | 
			
		||||
@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
 | 
			
		||||
{
 | 
			
		||||
	HeapTupleHeader tuphdr = ctx->tuphdr;
 | 
			
		||||
	uint16		infomask = tuphdr->t_infomask;
 | 
			
		||||
	TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr);
 | 
			
		||||
	bool		result = true;
 | 
			
		||||
	unsigned	expected_hoff;
 | 
			
		||||
 | 
			
		||||
@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
 | 
			
		||||
		 */
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!TransactionIdIsValid(curr_xmax) &&
 | 
			
		||||
		HeapTupleHeaderIsHotUpdated(tuphdr))
 | 
			
		||||
	{
 | 
			
		||||
		report_corruption(ctx,
 | 
			
		||||
						  psprintf("tuple has been HOT updated, but xmax is 0"));
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * As above, even though this shouldn't happen, it's not sufficient
 | 
			
		||||
		 * justification for skipping further checks, we should still be able
 | 
			
		||||
		 * to perform sensibly.
 | 
			
		||||
		 */
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (infomask & HEAP_HASNULL)
 | 
			
		||||
		expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
 | 
			
		||||
	else
 | 
			
		||||
@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
 | 
			
		||||
 * Returns true if the tuple itself should be checked, false otherwise.  Sets
 | 
			
		||||
 * ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
 | 
			
		||||
 * TOAST tuples -- are eligible for pruning.
 | 
			
		||||
 *
 | 
			
		||||
 * Sets *xmin_commit_status_ok to true if the commit status of xmin is known
 | 
			
		||||
 * and false otherwise. If it's set to true, then also set *xid_commit_status
 | 
			
		||||
 * to the actual commit status.
 | 
			
		||||
 */
 | 
			
		||||
static bool
 | 
			
		||||
check_tuple_visibility(HeapCheckContext *ctx)
 | 
			
		||||
check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
 | 
			
		||||
					   XidCommitStatus *xmin_commit_status)
 | 
			
		||||
{
 | 
			
		||||
	TransactionId xmin;
 | 
			
		||||
	TransactionId xvac;
 | 
			
		||||
@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
 | 
			
		||||
	HeapTupleHeader tuphdr = ctx->tuphdr;
 | 
			
		||||
 | 
			
		||||
	ctx->tuple_could_be_pruned = true;	/* have not yet proven otherwise */
 | 
			
		||||
	*xmin_commit_status_ok = false;		/* have not yet proven otherwise */
 | 
			
		||||
 | 
			
		||||
	/* If xmin is normal, it should be within valid range */
 | 
			
		||||
	xmin = HeapTupleHeaderGetXmin(tuphdr);
 | 
			
		||||
	switch (get_xid_status(xmin, ctx, &xmin_status))
 | 
			
		||||
	{
 | 
			
		||||
		case XID_INVALID:
 | 
			
		||||
			break;
 | 
			
		||||
		case XID_BOUNDS_OK:
 | 
			
		||||
			*xmin_commit_status_ok = true;
 | 
			
		||||
			*xmin_commit_status = xmin_status;
 | 
			
		||||
			break;
 | 
			
		||||
		case XID_IN_FUTURE:
 | 
			
		||||
			report_corruption(ctx,
 | 
			
		||||
@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
 | 
			
		||||
/*
 | 
			
		||||
 * Check the current tuple as tracked in ctx, recording any corruption found in
 | 
			
		||||
 * ctx->tupstore.
 | 
			
		||||
 *
 | 
			
		||||
 * We return some information about the status of xmin to aid in validating
 | 
			
		||||
 * update chains.
 | 
			
		||||
 */
 | 
			
		||||
static void
 | 
			
		||||
check_tuple(HeapCheckContext *ctx)
 | 
			
		||||
check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
 | 
			
		||||
			XidCommitStatus *xmin_commit_status)
 | 
			
		||||
{
 | 
			
		||||
	/*
 | 
			
		||||
	 * Check various forms of tuple header corruption, and if the header is
 | 
			
		||||
@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
 | 
			
		||||
	 * cannot assume our relation description matches the tuple structure, and
 | 
			
		||||
	 * therefore cannot check it.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!check_tuple_visibility(ctx))
 | 
			
		||||
	if (!check_tuple_visibility(ctx, xmin_commit_status_ok,
 | 
			
		||||
							xmin_commit_status))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
 | 
			
		||||
@ -174,12 +174,16 @@ sub write_tuple
 | 
			
		||||
# Set umask so test directories and files are created with default permissions
 | 
			
		||||
umask(0077);
 | 
			
		||||
 | 
			
		||||
my $pred_xmax;
 | 
			
		||||
my $pred_posid;
 | 
			
		||||
my $aborted_xid;
 | 
			
		||||
# Set up the node.  Once we create and corrupt the table,
 | 
			
		||||
# autovacuum workers visiting the table could crash the backend.
 | 
			
		||||
# Disable autovacuum so that won't happen.
 | 
			
		||||
my $node = PostgreSQL::Test::Cluster->new('test');
 | 
			
		||||
$node->init;
 | 
			
		||||
$node->append_conf('postgresql.conf', 'autovacuum=off');
 | 
			
		||||
$node->append_conf('postgresql.conf', 'max_prepared_transactions=10');
 | 
			
		||||
 | 
			
		||||
# Start the node and load the extensions.  We depend on both
 | 
			
		||||
# amcheck and pageinspect for this test.
 | 
			
		||||
@ -216,8 +220,15 @@ my $rel = $node->safe_psql('postgres',
 | 
			
		||||
	qq(SELECT pg_relation_filepath('public.test')));
 | 
			
		||||
my $relpath = "$pgdata/$rel";
 | 
			
		||||
 | 
			
		||||
# Insert data and freeze public.test
 | 
			
		||||
my $ROWCOUNT = 17;
 | 
			
		||||
# Initial setup for the public.test table.
 | 
			
		||||
# $ROWCOUNT is the total number of rows that we expect to insert into the page.
 | 
			
		||||
# $ROWCOUNT_BASIC is the number of those rows that are related to basic
 | 
			
		||||
# tuple validation, rather than update chain validation.
 | 
			
		||||
my $ROWCOUNT = 44;
 | 
			
		||||
my $ROWCOUNT_BASIC = 16;
 | 
			
		||||
 | 
			
		||||
# First insert data needed for tests unrelated to update chain validation.
 | 
			
		||||
# Then freeze the page. These tuples are at offset numbers 1 to 16.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
	INSERT INTO public.test (a, b, c)
 | 
			
		||||
@ -225,10 +236,86 @@ $node->safe_psql(
 | 
			
		||||
			x'DEADF9F9DEADF9F9'::bigint,
 | 
			
		||||
			'abcdefg',
 | 
			
		||||
			repeat('w', 10000)
 | 
			
		||||
        FROM generate_series(1, $ROWCOUNT);
 | 
			
		||||
	FROM generate_series(1, $ROWCOUNT_BASIC);
 | 
			
		||||
	VACUUM FREEZE public.test;)
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
# Create some simple HOT update chains for line pointer validation. After
 | 
			
		||||
# the page is HOT pruned, we'll have two redirects line pointers each pointing
 | 
			
		||||
# to a tuple. We'll then change the second redirect to point to the same
 | 
			
		||||
# tuple as the first one and verify that we can detect corruption.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		INSERT INTO public.test (a, b, c)
 | 
			
		||||
			VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
 | 
			
		||||
					 generate_series(1,2)); -- offset numbers 17 and 18
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '1'; -- offset number 19
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '2'; -- offset number 20
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
# Create some more HOT update chains.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		INSERT INTO public.test (a, b, c)
 | 
			
		||||
			VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
 | 
			
		||||
					 generate_series(3,6)); -- offset numbers 21 through 24
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '3'; -- offset number 25
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '4'; -- offset number 26
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
# Negative test case of HOT-pruning with aborted tuple.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		BEGIN;
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '5'; -- offset number 27
 | 
			
		||||
		ABORT;
 | 
			
		||||
		VACUUM FREEZE public.test;
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
# Next update on any tuple will be stored at the same place of tuple inserted
 | 
			
		||||
# by aborted transaction. This should not cause the table to appear corrupt.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '6'; -- offset number 27 again
 | 
			
		||||
		VACUUM FREEZE public.test;
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
# Data for HOT chain validation, so not calling VACUUM FREEZE.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		INSERT INTO public.test (a, b, c)
 | 
			
		||||
			VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
 | 
			
		||||
					 generate_series(7,15)); -- offset numbers 28 to 36
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '7'; -- offset number 37
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '10'; -- offset number 38
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '11'; -- offset number 39
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '12'; -- offset number 40
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '13'; -- offset number 41
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '14'; -- offset number 42
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '15'; -- offset number 43
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
# Need one aborted transaction to test corruption in HOT chains.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		BEGIN;
 | 
			
		||||
		UPDATE public.test SET c = 'a' WHERE c = '9'; -- offset number 44
 | 
			
		||||
		ABORT;
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
# Need one in-progress transaction to test few corruption in HOT chains.
 | 
			
		||||
# We are creating PREPARE TRANSACTION here as these will not be aborted
 | 
			
		||||
# even if we stop the node.
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		BEGIN;
 | 
			
		||||
		PREPARE TRANSACTION 'in_progress_tx';
 | 
			
		||||
	));
 | 
			
		||||
my $in_progress_xid = $node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
		SELECT transaction FROM pg_prepared_xacts;
 | 
			
		||||
	));
 | 
			
		||||
 | 
			
		||||
my $relfrozenxid = $node->safe_psql('postgres',
 | 
			
		||||
	q(select relfrozenxid from pg_class where relname = 'test'));
 | 
			
		||||
my $datfrozenxid = $node->safe_psql('postgres',
 | 
			
		||||
@ -245,11 +332,12 @@ if ($datfrozenxid <= 3 || $datfrozenxid >= $relfrozenxid)
 | 
			
		||||
	exit;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Find where each of the tuples is located on the page.
 | 
			
		||||
# Find where each of the tuples is located on the page. If a particular
 | 
			
		||||
# line pointer is a redirect rather than a tuple, we record the offset as -1.
 | 
			
		||||
my @lp_off = split '\n', $node->safe_psql(
 | 
			
		||||
	'postgres', qq(
 | 
			
		||||
	    select lp_off from heap_page_items(get_raw_page('test', 'main', 0))
 | 
			
		||||
		where lp <= $ROWCOUNT
 | 
			
		||||
	    SELECT CASE WHEN lp_flags = 2 THEN -1 ELSE lp_off END
 | 
			
		||||
	    FROM heap_page_items(get_raw_page('test', 'main', 0))
 | 
			
		||||
    )
 | 
			
		||||
);
 | 
			
		||||
is(scalar @lp_off, $ROWCOUNT, "acquired row offsets");
 | 
			
		||||
@ -268,6 +356,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
 | 
			
		||||
{
 | 
			
		||||
	my $offnum = $tupidx + 1;        # offnum is 1-based, not zero-based
 | 
			
		||||
	my $offset = $lp_off[$tupidx];
 | 
			
		||||
	next if $offset == -1;			 # ignore redirect line pointers
 | 
			
		||||
	my $tup = read_tuple($file, $offset);
 | 
			
		||||
 | 
			
		||||
	# Sanity-check that the data appears on the page where we expect.
 | 
			
		||||
@ -280,7 +369,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
 | 
			
		||||
		$node->clean_node;
 | 
			
		||||
		plan skip_all =>
 | 
			
		||||
		  sprintf(
 | 
			
		||||
			"Page layout differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")",
 | 
			
		||||
			"Page layout of index %d differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")", $tupidx,
 | 
			
		||||
			0xDEADF9F9, 0xDEADF9F9, "abcdefg", $a_1, $a_2, $b);
 | 
			
		||||
		exit;
 | 
			
		||||
	}
 | 
			
		||||
@ -315,6 +404,9 @@ use constant HEAP_XMAX_INVALID   => 0x0800;
 | 
			
		||||
use constant HEAP_NATTS_MASK     => 0x07FF;
 | 
			
		||||
use constant HEAP_XMAX_IS_MULTI  => 0x1000;
 | 
			
		||||
use constant HEAP_KEYS_UPDATED   => 0x2000;
 | 
			
		||||
use constant HEAP_HOT_UPDATED    => 0x4000;
 | 
			
		||||
use constant HEAP_ONLY_TUPLE     => 0x8000;
 | 
			
		||||
use constant HEAP_UPDATED        => 0x2000;
 | 
			
		||||
 | 
			
		||||
# Helper function to generate a regular expression matching the header we
 | 
			
		||||
# expect verify_heapam() to return given which fields we expect to be non-null.
 | 
			
		||||
@ -346,9 +438,11 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
 | 
			
		||||
{
 | 
			
		||||
	my $offnum = $tupidx + 1;        # offnum is 1-based, not zero-based
 | 
			
		||||
	my $offset = $lp_off[$tupidx];
 | 
			
		||||
	my $tup = read_tuple($file, $offset);
 | 
			
		||||
 | 
			
		||||
	my $header = header(0, $offnum, undef);
 | 
			
		||||
 | 
			
		||||
	# Read tuple, if there is one.
 | 
			
		||||
	my $tup = $offset == -1 ? undef : read_tuple($file, $offset);
 | 
			
		||||
 | 
			
		||||
	if ($offnum == 1)
 | 
			
		||||
	{
 | 
			
		||||
		# Corruptly set xmin < relfrozenxid
 | 
			
		||||
@ -361,7 +455,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}xmin $xmin precedes relation freeze threshold 0:\d+/;
 | 
			
		||||
	}
 | 
			
		||||
	if ($offnum == 2)
 | 
			
		||||
	elsif ($offnum == 2)
 | 
			
		||||
	{
 | 
			
		||||
		# Corruptly set xmin < datfrozenxid
 | 
			
		||||
		my $xmin = 3;
 | 
			
		||||
@ -521,7 +615,137 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID 0:\d+/;
 | 
			
		||||
	}
 | 
			
		||||
	write_tuple($file, $offset, $tup);
 | 
			
		||||
	elsif ($offnum == 17)
 | 
			
		||||
	{
 | 
			
		||||
		# at offnum 19 we will unset HEAP_ONLY_TUPLE and HEAP_UPDATED flags.
 | 
			
		||||
		die "offnum $offnum should be a redirect" if defined $tup;
 | 
			
		||||
		push @expected,
 | 
			
		||||
			qr/${header}redirected line pointer points to a non-heap-only tuple at offset \d+/;
 | 
			
		||||
		push @expected,
 | 
			
		||||
			qr/${header}redirected line pointer points to a non-heap-updated tuple at offset \d+/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 18)
 | 
			
		||||
	{
 | 
			
		||||
		# rewrite line pointer with lp_off = 17, lp_flags = 2, lp_len = 0.
 | 
			
		||||
		die "offnum $offnum should be a redirect" if defined $tup;
 | 
			
		||||
		sysseek($file, 92, 0) or BAIL_OUT("sysseek failed: $!");
 | 
			
		||||
		syswrite($file,
 | 
			
		||||
				 pack("L", $ENDIANNESS eq 'little' ? 0x00010011 : 0x11000100))
 | 
			
		||||
			or BAIL_OUT("syswrite failed: $!");
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}redirected line pointer points to another redirected line pointer at offset \d+/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 19)
 | 
			
		||||
	{
 | 
			
		||||
		# unset HEAP_ONLY_TUPLE and HEAP_UPDATED flag, so that update chain
 | 
			
		||||
		# validation will complain about offset 17
 | 
			
		||||
		$tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
 | 
			
		||||
		$tup->{t_infomask} &= ~HEAP_UPDATED;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 22)
 | 
			
		||||
	{
 | 
			
		||||
		# rewrite line pointer with lp.off = 25, lp_flags = 2, lp_len = 0
 | 
			
		||||
		sysseek($file, 108, 0) or BAIL_OUT("sysseek failed: $!");
 | 
			
		||||
		syswrite($file,
 | 
			
		||||
				 pack("L", $ENDIANNESS eq 'little' ? 0x00010019 : 0x19000100))
 | 
			
		||||
			or BAIL_OUT("syswrite failed: $!");
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}redirect line pointer points to offset \d+, but offset \d+ also points there/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 28)
 | 
			
		||||
	{
 | 
			
		||||
		$tup->{t_infomask2} &= ~HEAP_HOT_UPDATED;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}non-heap-only update produced a heap-only tuple at offset \d+/;
 | 
			
		||||
 | 
			
		||||
		# Save these values so we can insert them into the tuple at offnum 29.
 | 
			
		||||
		$pred_xmax = $tup->{t_xmax};
 | 
			
		||||
		$pred_posid = $tup->{ip_posid};
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 29)
 | 
			
		||||
	{
 | 
			
		||||
		# Copy these values from the tuple at offset 28.
 | 
			
		||||
		$tup->{t_xmax} = $pred_xmax;
 | 
			
		||||
		$tup->{ip_posid} = $pred_posid;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}tuple points to new version at offset \d+, but offset \d+ also points there/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 30)
 | 
			
		||||
	{
 | 
			
		||||
		# Save xid, so we can insert into into tuple at offset 31.
 | 
			
		||||
		$aborted_xid = $tup->{t_xmax};
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 31)
 | 
			
		||||
	{
 | 
			
		||||
		# Set xmin to xmax of tuple at offset 30.
 | 
			
		||||
		$tup->{t_xmin} = $aborted_xid;
 | 
			
		||||
		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 32)
 | 
			
		||||
	{
 | 
			
		||||
		$tup->{t_infomask2} |= HEAP_ONLY_TUPLE;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}tuple is root of chain but is marked as heap-only tuple/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 33)
 | 
			
		||||
	{
 | 
			
		||||
		# Tuple at offset 40 is the successor of this one; we'll corrupt it to
 | 
			
		||||
		# be non-heap-only.
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}heap-only update produced a non-heap only tuple at offset \d+/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 34)
 | 
			
		||||
	{
 | 
			
		||||
		$tup->{t_xmax} = 0;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}tuple has been HOT updated, but xmax is 0/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 35)
 | 
			
		||||
	{
 | 
			
		||||
		$tup->{t_xmin} = $in_progress_xid;
 | 
			
		||||
		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}tuple with in-progress xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 36)
 | 
			
		||||
	{
 | 
			
		||||
		# Tuple at offset 43 is the successor of this one; we'll corrupt it to
 | 
			
		||||
		# have xmin = $in_progress_xid. By setting the xmax of this tuple to
 | 
			
		||||
		# the same value, we make it look like an update chain with an
 | 
			
		||||
		# in-progress XID following a committed one.
 | 
			
		||||
		$tup->{t_xmin} = $aborted_xid;
 | 
			
		||||
		$tup->{t_xmax} = $in_progress_xid;
 | 
			
		||||
		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
 | 
			
		||||
		push @expected,
 | 
			
		||||
		  qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with in-progress xmin \d+/;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 40)
 | 
			
		||||
	{
 | 
			
		||||
		# Tuple at offset 33 is the predecessor of this one; the error will
 | 
			
		||||
		# be reported there.
 | 
			
		||||
		$tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
 | 
			
		||||
	}
 | 
			
		||||
	elsif ($offnum == 43)
 | 
			
		||||
	{
 | 
			
		||||
		# Tuple at offset 36 is the predecessor of this one; the error will
 | 
			
		||||
		# be reported there.
 | 
			
		||||
		$tup->{t_xmin} = $in_progress_xid;
 | 
			
		||||
		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
	{
 | 
			
		||||
		# The tests for update chain validation end up creating a bunch of
 | 
			
		||||
		# tuples that aren't corrupted in any way e.g. because only one of
 | 
			
		||||
		# the two tuples in the update chain needs to be corrupted for the
 | 
			
		||||
		# test, or because one update chain is being made to erroneously
 | 
			
		||||
		# point into the middle of another that has nothing wrong with it.
 | 
			
		||||
		# In all such cases we need not write the tuple back to the file.
 | 
			
		||||
		next;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	write_tuple($file, $offset, $tup) if defined $tup;
 | 
			
		||||
}
 | 
			
		||||
close($file)
 | 
			
		||||
  or BAIL_OUT("close failed: $!");
 | 
			
		||||
@ -532,6 +756,10 @@ $node->start;
 | 
			
		||||
$node->command_checks_all(
 | 
			
		||||
	[ 'pg_amcheck', '--no-dependent-indexes', '-p', $port, 'postgres' ],
 | 
			
		||||
	2, [@expected], [], 'Expected corruption message output');
 | 
			
		||||
$node->safe_psql(
 | 
			
		||||
        'postgres', qq(
 | 
			
		||||
                        COMMIT PREPARED 'in_progress_tx';
 | 
			
		||||
        ));
 | 
			
		||||
 | 
			
		||||
$node->teardown_node;
 | 
			
		||||
$node->clean_node;
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user