Correctly copy the target host identification in PQcancelCreate.

PQcancelCreate failed to copy struct pg_conn_host's "type" field, instead leaving it zero (a/k/a CHT_HOST_NAME). This seemingly has no great ill effects if it should have been CHT_UNIX_SOCKET instead, but if it should have been CHT_HOST_ADDRESS then a null-pointer dereference will occur when the cancelConn is used. Bug: #18974 Reported-by: Maxim Boguk <maxim.boguk@gmail.com> Author: Sergei Kornilov <sk@zsrv.org> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/18974-575f02b2168b36b3@postgresql.org Backpatch-through: 17
Update obsolete row compare preprocessing comments.
2025-07-04 00:01:39 -04:00 · 2025-07-02 15:48:03 -04:00 · 2025-07-02 12:36:34 -04:00 · 2025-07-02 17:02:27 +02:00 · 2025-07-02 09:48:14 -04:00 · 2025-07-02 09:40:48 -04:00
16 changed files with 950 additions and 514 deletions
--- a/doc/src/sgml/pgbuffercache.sgml
+++ b/doc/src/sgml/pgbuffercache.sgml
@ -37,12 +37,12 @@
 <para>
  This module provides the <function>pg_buffercache_pages()</function>
-  function (wrapped in the <structname>pg_buffercache</structname> view),
+  function (wrapped in the <structname>pg_buffercache</structname> view), the
  <function>pg_buffercache_numa_pages()</function> function (wrapped in the
  <structname>pg_buffercache_numa</structname> view), the
  <function>pg_buffercache_summary()</function> function, the
  <function>pg_buffercache_usage_counts()</function> function, the
-  <function>pg_buffercache_evict()</function>, the
+  <function>pg_buffercache_evict()</function> function, the
  <function>pg_buffercache_evict_relation()</function> function and the
  <function>pg_buffercache_evict_all()</function> function.
 </para>
@ -55,7 +55,7 @@
 </para>
 <para>
-  The <function>pg_buffercache_numa_pages()</function> provides
+  The <function>pg_buffercache_numa_pages()</function> function provides
  <acronym>NUMA</acronym> node mappings for shared buffer entries. This
  information is not part of <function>pg_buffercache_pages()</function>
  itself, as it is much slower to retrieve.
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@ -431,7 +431,7 @@ static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
 								   BlockNumber blkno, Page page,
 								   bool sharelock, Buffer vmbuffer);
-static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
+static int	lazy_scan_prune(LVRelState *vacrel, Buffer buf,
 							BlockNumber blkno, Page page,
 							Buffer vmbuffer, bool all_visible_according_to_vm,
 							bool *has_lpdead_items, bool *vm_page_frozen);
@ -1245,6 +1245,7 @@ lazy_scan_heap(LVRelState *vacrel)
 		Buffer		buf;
 		Page		page;
 		uint8		blk_info = 0;
 		int			ndeleted = 0;
 		bool		has_lpdead_items;
 		void	   *per_buffer_data = NULL;
 		bool		vm_page_frozen = false;
@ -1387,10 +1388,10 @@ lazy_scan_heap(LVRelState *vacrel)
 		 * line pointers previously marked LP_DEAD.
 		 */
 		if (got_cleanup_lock)
-			lazy_scan_prune(vacrel, buf, blkno, page,
+			ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
-							vmbuffer,
+									   vmbuffer,
-							blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
+									   blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
-							&has_lpdead_items, &vm_page_frozen);
+									   &has_lpdead_items, &vm_page_frozen);
 		/*
 		 * Count an eagerly scanned page as a failure or a success.
@ -1481,7 +1482,7 @@ lazy_scan_heap(LVRelState *vacrel)
 			 * table has indexes. There will only be newly-freed space if we
 			 * held the cleanup lock and lazy_scan_prune() was called.
 			 */
-			if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
+			if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
 				blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
 			{
 				FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
@ -1936,8 +1937,10 @@ cmpOffsetNumbers(const void *a, const void *b)
 * *vm_page_frozen is set to true if the page is newly set all-frozen in the
 * VM. The caller currently only uses this for determining whether an eagerly
 * scanned page was successfully set all-frozen.
 *
 * Returns the number of tuples deleted from the page during HOT pruning.
 */
-static void
+static int
 lazy_scan_prune(LVRelState *vacrel,
 				Buffer buf,
 				BlockNumber blkno,
@ -2208,6 +2211,8 @@ lazy_scan_prune(LVRelState *vacrel,
 			*vm_page_frozen = true;
 		}
 	}
 	return presult.ndeleted;
 }
 /*
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@ -16,6 +16,7 @@
 #include "postgres.h"
 #include "access/nbtree.h"
 #include "common/int.h"
 #include "lib/qunique.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
@ -56,6 +57,8 @@ static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
 										  BTArrayKeyInfo *array);
 static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 										  BTArrayKeyInfo *array);
 static void _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap);
 static int	_bt_reorder_array_cmp(const void *a, const void *b);
 static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys);
 static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
 static int	_bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out,
@ -96,7 +99,7 @@ static int	_bt_compare_array_elements(const void *a, const void *b, void *arg);
 * incomplete sets of cross-type operators, we may fail to detect redundant
 * or contradictory keys, but we can survive that.)
 *
- * The output keys must be sorted by index attribute.  Presently we expect
+ * Required output keys are sorted by index attribute.  Presently we expect
 * (but verify) that the input keys are already so sorted --- this is done
 * by match_clauses_to_index() in indxpath.c.  Some reordering of the keys
 * within each attribute may be done as a byproduct of the processing here.
@ -127,29 +130,36 @@ static int	_bt_compare_array_elements(const void *a, const void *b, void *arg);
 * This has the potential to be much more efficient than a full index scan
 * (though it behaves like a full scan when there's many distinct "x" values).
 *
- * If possible, redundant keys are eliminated: we keep only the tightest
+ * Typically, redundant keys are eliminated: we keep only the tightest
 * >/>= bound and the tightest </<= bound, and if there's an = key then
 * that's the only one returned.  (So, we return either a single = key,
 * or one or two boundary-condition keys for each attr.)  However, if we
 * cannot compare two keys for lack of a suitable cross-type operator,
- * we cannot eliminate either.  If there are two such keys of the same
+ * we cannot eliminate either key.
 * operator strategy, the second one is just pushed into the output array
 * without further processing here.  We may also emit both >/>= or both
 * </<= keys if we can't compare them.  The logic about required keys still
 * works if we don't eliminate redundant keys.
 *
- * Note that one reason we need direction-sensitive required-key flags is
+ * When all redundant keys could not be eliminated, we'll output a key array
- * precisely that we may not be able to eliminate redundant keys.  Suppose
+ * that can more or less be treated as if it had no redundant keys.  Suppose
- * we have "x > 4::int AND x > 10::bigint", and we are unable to determine
+ * we have "x > 4::int AND x > 10::bigint AND x < 70", and we are unable to
- * which key is more restrictive for lack of a suitable cross-type operator.
+ * determine which > key is more restrictive for lack of a suitable cross-type
- * _bt_first will arbitrarily pick one of the keys to do the initial
+ * operator.  We'll arbitrarily pick one of the > keys; the other > key won't
- * positioning with.  If it picks x > 4, then the x > 10 condition will fail
+ * be marked required.  Obviously, the scan will be less efficient if we
- * until we reach index entries > 10; but we can't stop the scan just because
+ * choose x > 4 over x > 10 -- but it can still largely proceed as if there
- * x > 10 is failing.  On the other hand, if we are scanning backwards, then
+ * was only a single > condition.  "x > 10" will be placed at the end of the
- * failure of either key is indeed enough to stop the scan.  (In general, when
+ * so->keyData[] output array.  It'll always be evaluated last, after the keys
- * inequality keys are present, the initial-positioning code only promises to
+ * that could be marked required in the usual way (after "x > 4 AND x < 70").
- * position before the first possible match, not exactly at the first match,
+ * This can sometimes result in so->keyData[] keys that aren't even in index
- * for a forward scan; or after the last match for a backward scan.)
+ * attribute order (if the qual involves multiple attributes).  The scan's
 * required keys will still be in attribute order, though, so it can't matter.
 *
 * This scheme ensures that _bt_first always uses the same set of keys at the
 * start of a forwards scan as those _bt_checkkeys uses to determine when to
 * end a similar backwards scan (and vice-versa).  _bt_advance_array_keys
 * depends on this: it expects to be able to reliably predict what the next
 * _bt_first call will do by testing whether _bt_checkkeys' routines report
 * that the final tuple on the page is past the end of matches for the scan's
 * keys with the scan direction flipped.  If it is (if continuescan=false),
 * then it follows that calling _bt_first will, at a minimum, relocate the
 * scan to the very next leaf page (in the current scan direction).
 *
 * As a byproduct of this work, we can detect contradictory quals such
 * as "x = 1 AND x > 2".  If we see that, we return so->qual_ok = false,
@ -188,7 +198,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	int			numberOfEqualCols;
 	ScanKey		inkeys;
 	BTScanKeyPreproc xform[BTMaxStrategyNumber];
-	bool		test_result;
+	bool		test_result,
 				redundant_key_kept = false;
 	AttrNumber	attno;
 	ScanKey		arrayKeyData;
 	int		   *keyDataMap = NULL;
@ -388,7 +399,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 						xform[j].inkey = NULL;
 						xform[j].inkeyi = -1;
 					}
-					/* else, cannot determine redundancy, keep both keys */
+					else
 						redundant_key_kept = true;
 				}
 				/* track number of attrs for which we have "=" keys */
 				numberOfEqualCols++;
@ -409,6 +421,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 					else
 						xform[BTLessStrategyNumber - 1].inkey = NULL;
 				}
 				else
 					redundant_key_kept = true;
 			}
 			/* try to keep only one of >, >= */
@ -426,6 +440,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 					else
 						xform[BTGreaterStrategyNumber - 1].inkey = NULL;
 				}
 				else
 					redundant_key_kept = true;
 			}
 			/*
@ -466,25 +482,6 @@ _bt_preprocess_keys(IndexScanDesc scan)
 		/* check strategy this key's operator corresponds to */
 		j = inkey->sk_strategy - 1;
 		/* if row comparison, push it directly to the output array */
 		if (inkey->sk_flags & SK_ROW_HEADER)
 		{
 			ScanKey		outkey = &so->keyData[new_numberOfKeys++];
 			memcpy(outkey, inkey, sizeof(ScanKeyData));
 			if (arrayKeyData)
 				keyDataMap[new_numberOfKeys - 1] = i;
 			if (numberOfEqualCols == attno - 1)
 				_bt_mark_scankey_required(outkey);
 			/*
 			 * We don't support RowCompare using equality; such a qual would
 			 * mess up the numberOfEqualCols tracking.
 			 */
 			Assert(j != (BTEqualStrategyNumber - 1));
 			continue;
 		}
 		if (inkey->sk_strategy == BTEqualStrategyNumber &&
 			(inkey->sk_flags & SK_SEARCHARRAY))
 		{
@ -593,9 +590,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				 * the new scan key.
 				 *
 				 * Note: We do things this way around so that our arrays are
-				 * always in the same order as their corresponding scan keys,
+				 * always in the same order as their corresponding scan keys.
-				 * even with incomplete opfamilies.  _bt_advance_array_keys
+				 * _bt_preprocess_array_keys_final expects this.
 				 * depends on this.
 				 */
 				ScanKey		outkey = &so->keyData[new_numberOfKeys++];
@ -607,6 +603,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				xform[j].inkey = inkey;
 				xform[j].inkeyi = i;
 				xform[j].arrayidx = arrayidx;
 				redundant_key_kept = true;
 			}
 		}
 	}
@ -622,6 +619,15 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	if (arrayKeyData)
 		_bt_preprocess_array_keys_final(scan, keyDataMap);
 	/*
 	 * If there are remaining redundant inequality keys, we must make sure
 	 * that each index attribute has no more than one required >/>= key, and
 	 * no more than one required </<= key.  Attributes that have one or more
 	 * required = keys now must keep only one required key (the first = key).
 	 */
 	if (unlikely(redundant_key_kept) && so->qual_ok)
 		_bt_unmark_keys(scan, keyDataMap);
 	/* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */
 }
@ -746,9 +752,12 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
 *
 * Depending on the operator type, the key may be required for both scan
 * directions or just one.  Also, if the key is a row comparison header,
- * we have to mark its first subsidiary ScanKey as required.  (Subsequent
+ * we have to mark the appropriate subsidiary ScanKeys as required.  In such
- * subsidiary ScanKeys are normally for lower-order columns, and thus
+ * cases, the first subsidiary key is required, but subsequent ones are
- * cannot be required, since they're after the first non-equality scankey.)
+ * required only as long as they correspond to successive index columns and
 * match the leading column as to sort direction.  Otherwise the row
 * comparison ordering is different from the index ordering and so we can't
 * stop the scan on the basis of those lower-order columns.
 *
 * Note: when we set required-key flag bits in a subsidiary scankey, we are
 * scribbling on a data structure belonging to the index AM's caller, not on
@ -786,12 +795,25 @@ _bt_mark_scankey_required(ScanKey skey)
 	if (skey->sk_flags & SK_ROW_HEADER)
 	{
 		ScanKey		subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
 		AttrNumber	attno = skey->sk_attno;
 		/* First subkey should be same column/operator as the header */
-		Assert(subkey->sk_flags & SK_ROW_MEMBER);
+		Assert(subkey->sk_attno == attno);
 		Assert(subkey->sk_attno == skey->sk_attno);
 		Assert(subkey->sk_strategy == skey->sk_strategy);
-		subkey->sk_flags |= addflags;
+
 		for (;;)
 		{
 			Assert(subkey->sk_flags & SK_ROW_MEMBER);
 			if (subkey->sk_attno != attno)
 				break;			/* non-adjacent key, so not required */
 			if (subkey->sk_strategy != skey->sk_strategy)
 				break;			/* wrong direction, so not required */
 			subkey->sk_flags |= addflags;
 			if (subkey->sk_flags & SK_ROW_END)
 				break;
 			subkey++;
 			attno++;
 		}
 	}
 }
@ -847,8 +869,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
 				cmp_op;
 	StrategyNumber strat;
-	Assert(!((leftarg->sk_flags | rightarg->sk_flags) &
+	Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_MEMBER));
 			 (SK_ROW_HEADER | SK_ROW_MEMBER)));
 	/*
 	 * First, deal with cases where one or both args are NULL.  This should
@ -924,6 +945,16 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
 		return true;
 	}
 	/*
 	 * We don't yet know how to determine redundancy when it involves a row
 	 * compare key (barring simple cases involving IS NULL/IS NOT NULL)
 	 */
 	if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_HEADER)
 	{
 		Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP));
 		return false;
 	}
 	/*
 	 * If either leftarg or rightarg are equality-type array scankeys, we need
 	 * specialized handling (since by now we know that IS NULL wasn't used)
@ -1467,6 +1498,283 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 	}
 }
 /*
 *	_bt_unmark_keys() -- make superfluous required keys nonrequired after all
 *
 * When _bt_preprocess_keys fails to eliminate one or more redundant keys, it
 * calls here to make sure that no index attribute has more than one > or >=
 * key marked required, and no more than one required < or <= key.  Attributes
 * with = keys will always get one = key as their required key.  All other
 * keys that were initially marked required get "unmarked" here.  That way,
 * _bt_first and _bt_checkkeys will reliably agree on which keys to use to
 * start and/or to end the scan.
 *
 * We also relocate keys that become/started out nonrequired to the end of
 * so->keyData[].  That way, _bt_first and _bt_checkkeys cannot fail to reach
 * a required key due to some earlier nonrequired key getting in the way.
 *
 * Only call here when _bt_compare_scankey_args returned false at least once
 * (otherwise, calling here will just waste cycles).
 */
 static void
 _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap)
 {
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	AttrNumber	attno;
 	bool	   *unmarkikey;
 	int			nunmark,
 				nunmarked,
 				nkept,
 				firsti;
 	ScanKey		keepKeys,
 				unmarkKeys;
 	FmgrInfo   *keepOrderProcs = NULL,
 			   *unmarkOrderProcs = NULL;
 	bool		haveReqEquals,
 				haveReqForward,
 				haveReqBackward;
 	/*
 	 * Do an initial pass over so->keyData[] that determines which keys to
 	 * keep as required.  We expect so->keyData[] to still be in attribute
 	 * order when we're called (though we don't expect any particular order
 	 * among each attribute's keys).
 	 *
 	 * When both equality and inequality keys remain on a single attribute, we
 	 * *must* make sure that exactly one of the equalities remains required.
 	 * Any requiredness markings that we might leave on later keys/attributes
 	 * are predicated on there being required = keys on all prior columns.
 	 */
 	unmarkikey = palloc0(so->numberOfKeys * sizeof(bool));
 	nunmark = 0;
 	/* Set things up for first key's attribute */
 	attno = so->keyData[0].sk_attno;
 	firsti = 0;
 	haveReqEquals = false;
 	haveReqForward = false;
 	haveReqBackward = false;
 	for (int i = 0; i < so->numberOfKeys; i++)
 	{
 		ScanKey		origkey = &so->keyData[i];
 		if (origkey->sk_attno != attno)
 		{
 			/* Reset for next attribute */
 			attno = origkey->sk_attno;
 			firsti = i;
 			haveReqEquals = false;
 			haveReqForward = false;
 			haveReqBackward = false;
 		}
 		/* Equalities get priority over inequalities */
 		if (haveReqEquals)
 		{
 			/*
 			 * We already found the first "=" key for this attribute.  We've
 			 * already decided that all its other keys will be unmarked.
 			 */
 			Assert(!(origkey->sk_flags & SK_SEARCHNULL));
 			unmarkikey[i] = true;
 			nunmark++;
 			continue;
 		}
 		else if ((origkey->sk_flags & SK_BT_REQFWD) &&
 				 (origkey->sk_flags & SK_BT_REQBKWD))
 		{
 			/*
 			 * Found the first "=" key for attno.  All other attno keys will
 			 * be unmarked.
 			 */
 			Assert(origkey->sk_strategy == BTEqualStrategyNumber);
 			haveReqEquals = true;
 			for (int j = firsti; j < i; j++)
 			{
 				/* Unmark any prior inequality keys on attno after all */
 				if (!unmarkikey[j])
 				{
 					unmarkikey[j] = true;
 					nunmark++;
 				}
 			}
 			continue;
 		}
 		/* Deal with inequalities next */
 		if ((origkey->sk_flags & SK_BT_REQFWD) && !haveReqForward)
 		{
 			haveReqForward = true;
 			continue;
 		}
 		else if ((origkey->sk_flags & SK_BT_REQBKWD) && !haveReqBackward)
 		{
 			haveReqBackward = true;
 			continue;
 		}
 		/*
 		 * We have either a redundant inequality key that will be unmarked, or
 		 * we have a key that wasn't marked required in the first place
 		 */
 		unmarkikey[i] = true;
 		nunmark++;
 	}
 	/* Should only be called when _bt_compare_scankey_args reported failure */
 	Assert(nunmark > 0);
 	/*
 	 * Next, allocate temp arrays: one for required keys that'll remain
 	 * required, the other for all remaining keys
 	 */
 	unmarkKeys = palloc(nunmark * sizeof(ScanKeyData));
 	keepKeys = palloc((so->numberOfKeys - nunmark) * sizeof(ScanKeyData));
 	nunmarked = 0;
 	nkept = 0;
 	if (so->numArrayKeys)
 	{
 		unmarkOrderProcs = palloc(nunmark * sizeof(FmgrInfo));
 		keepOrderProcs = palloc((so->numberOfKeys - nunmark) * sizeof(FmgrInfo));
 	}
 	/*
 	 * Next, copy the contents of so->keyData[] into the appropriate temp
 	 * array.
 	 *
 	 * Scans with = array keys need us to maintain invariants around the order
 	 * of so->orderProcs[] and so->arrayKeys[] relative to so->keyData[].  See
 	 * _bt_preprocess_array_keys_final for a full explanation.
 	 */
 	for (int i = 0; i < so->numberOfKeys; i++)
 	{
 		ScanKey		origkey = &so->keyData[i];
 		ScanKey		unmark;
 		if (!unmarkikey[i])
 		{
 			/*
 			 * Key gets to keep its original requiredness markings.
 			 *
 			 * Key will stay in its original position, unless we're going to
 			 * unmark an earlier key (in which case this key gets moved back).
 			 */
 			memcpy(keepKeys + nkept, origkey, sizeof(ScanKeyData));
 			if (so->numArrayKeys)
 			{
 				keyDataMap[i] = nkept;
 				memcpy(keepOrderProcs + nkept, &so->orderProcs[i],
 					   sizeof(FmgrInfo));
 			}
 			nkept++;
 			continue;
 		}
 		/*
 		 * Key will be unmarked as needed, and moved to the end of the array,
 		 * next to other keys that will become (or always were) nonrequired
 		 */
 		unmark = unmarkKeys + nunmarked;
 		memcpy(unmark, origkey, sizeof(ScanKeyData));
 		if (so->numArrayKeys)
 		{
 			keyDataMap[i] = (so->numberOfKeys - nunmark) + nunmarked;
 			memcpy(&unmarkOrderProcs[nunmarked], &so->orderProcs[i],
 				   sizeof(FmgrInfo));
 		}
 		/*
 		 * Preprocessing only generates skip arrays when it knows that they'll
 		 * be the only required = key on the attr.  We'll never unmark them.
 		 */
 		Assert(!(unmark->sk_flags & SK_BT_SKIP));
 		/*
 		 * Also shouldn't have to unmark an IS NULL or an IS NOT NULL key.
 		 * They aren't cross-type, so an incomplete opfamily can't matter.
 		 */
 		Assert(!(unmark->sk_flags & SK_ISNULL) ||
 			   !(unmark->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)));
 		/* Clear requiredness flags on redundant key (and on any subkeys) */
 		unmark->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
 		if (unmark->sk_flags & SK_ROW_HEADER)
 		{
 			ScanKey		subkey = (ScanKey) DatumGetPointer(unmark->sk_argument);
 			Assert(subkey->sk_strategy == unmark->sk_strategy);
 			for (;;)
 			{
 				Assert(subkey->sk_flags & SK_ROW_MEMBER);
 				subkey->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
 				if (subkey->sk_flags & SK_ROW_END)
 					break;
 				subkey++;
 			}
 		}
 		nunmarked++;
 	}
 	/* Copy both temp arrays back into so->keyData[] to reorder */
 	Assert(nkept == so->numberOfKeys - nunmark);
 	Assert(nunmarked == nunmark);
 	memcpy(so->keyData, keepKeys, sizeof(ScanKeyData) * nkept);
 	memcpy(so->keyData + nkept, unmarkKeys, sizeof(ScanKeyData) * nunmarked);
 	/* Done with temp arrays */
 	pfree(unmarkikey);
 	pfree(keepKeys);
 	pfree(unmarkKeys);
 	/*
 	 * Now copy so->orderProcs[] temp entries needed by scans with = array
 	 * keys back (just like with the so->keyData[] temp arrays)
 	 */
 	if (so->numArrayKeys)
 	{
 		memcpy(so->orderProcs, keepOrderProcs, sizeof(FmgrInfo) * nkept);
 		memcpy(so->orderProcs + nkept, unmarkOrderProcs,
 			   sizeof(FmgrInfo) * nunmarked);
 		/* Also fix-up array->scan_key references */
 		for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
 		{
 			BTArrayKeyInfo *array = &so->arrayKeys[arridx];
 			array->scan_key = keyDataMap[array->scan_key];
 		}
 		/*
 		 * Sort so->arrayKeys[] based on its new BTArrayKeyInfo.scan_key
 		 * offsets, so that its order matches so->keyData[] order as expected
 		 */
 		qsort(so->arrayKeys, so->numArrayKeys, sizeof(BTArrayKeyInfo),
 			  _bt_reorder_array_cmp);
 		/* Done with temp arrays */
 		pfree(unmarkOrderProcs);
 		pfree(keepOrderProcs);
 	}
 }
 /*
 * qsort comparator for reordering so->arrayKeys[] BTArrayKeyInfo entries
 */
 static int
 _bt_reorder_array_cmp(const void *a, const void *b)
 {
 	BTArrayKeyInfo *arraya = (BTArrayKeyInfo *) a;
 	BTArrayKeyInfo *arrayb = (BTArrayKeyInfo *) b;
 	return pg_cmp_s32(arraya->scan_key, arrayb->scan_key);
 }
 /*
 *	_bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
 *
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@ -960,46 +960,51 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	/*----------
 	 * Examine the scan keys to discover where we need to start the scan.
 	 * The selected scan keys (at most one per index column) are remembered by
 	 * storing their addresses into the local startKeys[] array.  The final
 	 * startKeys[] entry's strategy is set in strat_total. (Actually, there
 	 * are a couple of cases where we force a less/more restrictive strategy.)
 	 *
-	 * We want to identify the keys that can be used as starting boundaries;
+	 * We must use the key that was marked required (in the direction opposite
-	 * these are =, >, or >= keys for a forward scan or =, <, <= keys for
+	 * our own scan's) during preprocessing.  Each index attribute can only
-	 * a backwards scan.  We can use keys for multiple attributes so long as
+	 * have one such required key.  In general, the keys that we use to find
-	 * the prior attributes had only =, >= (resp. =, <=) keys.  Once we accept
+	 * an initial position when scanning forwards are the same keys that end
-	 * a > or < boundary or find an attribute with no boundary (which can be
+	 * the scan on the leaf level when scanning backwards (and vice-versa).
 	 * thought of as the same as "> -infinity"), we can't use keys for any
 	 * attributes to its right, because it would break our simplistic notion
 	 * of what initial positioning strategy to use.
 	 *
 	 * When the scan keys include cross-type operators, _bt_preprocess_keys
-	 * may not be able to eliminate redundant keys; in such cases we will
+	 * may not be able to eliminate redundant keys; in such cases it will
-	 * arbitrarily pick a usable one for each attribute.  This is correct
+	 * arbitrarily pick a usable key for each attribute (and scan direction),
-	 * but possibly not optimal behavior.  (For example, with keys like
+	 * ensuring that there is no more than one key required in each direction.
-	 * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
+	 * We stop considering further keys once we reach the first nonrequired
-	 * x=5 would be more efficient.)  Since the situation only arises given
+	 * key (which must come after all required keys), so this can't affect us.
 	 * a poorly-worded query plus an incomplete opfamily, live with it.
 	 *
-	 * When both equality and inequality keys appear for a single attribute
+	 * The required keys that we use as starting boundaries have to be =, >,
-	 * (again, only possible when cross-type operators appear), we *must*
+	 * or >= keys for a forward scan or =, <, <= keys for a backwards scan.
-	 * select one of the equality keys for the starting point, because
+	 * We can use keys for multiple attributes so long as the prior attributes
-	 * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
+	 * had only =, >= (resp. =, <=) keys.  These rules are very similar to the
-	 * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
+	 * rules that preprocessing used to determine which keys to mark required.
-	 * start at x=4, we will fail and stop before reaching x=10.  If multiple
+	 * We cannot always use every required key as a positioning key, though.
-	 * equality quals survive preprocessing, however, it doesn't matter which
+	 * Skip arrays necessitate independently applying our own rules here.
-	 * one we use --- by definition, they are either redundant or
+	 * Skip arrays are always generally considered = array keys, but we'll
-	 * contradictory.
+	 * nevertheless treat them as inequalities at certain points of the scan.
 	 * When that happens, it _might_ have implications for the number of
 	 * required keys that we can safely use for initial positioning purposes.
 	 *
-	 * In practice we rarely see any "attribute boundary key gaps" here.
+	 * For example, a forward scan with a skip array on its leading attribute
-	 * Preprocessing can usually backfill skip array keys for any attributes
+	 * (with no low_compare/high_compare) will have at least two required scan
-	 * that were omitted from the original scan->keyData[] input keys.  All
+	 * keys, but we won't use any of them as boundary keys during the scan's
-	 * array keys are always considered = keys, but we'll sometimes need to
+	 * initial call here.  Our positioning key during the first call here can
-	 * treat the current key value as if we were using an inequality strategy.
+	 * be thought of as representing "> -infinity".  Similarly, if such a skip
-	 * This happens with range skip arrays, which store inequality keys in the
+	 * array's low_compare is "a > 'foo'", then we position using "a > 'foo'"
-	 * array's low_compare/high_compare fields (used to find the first/last
+	 * during the scan's initial call here; a lower-order key such as "b = 42"
-	 * set of matches, when = key will lack a usable sk_argument value).
+	 * can't be used until the "a" array advances beyond MINVAL/low_compare.
-	 * These are always preferred over any redundant "standard" inequality
+	 *
-	 * keys on the same column (per the usual rule about preferring = keys).
+	 * On the other hand, if such a skip array's low_compare was "a >= 'foo'",
-	 * Note also that any column with an = skip array key can never have an
+	 * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here.
-	 * additional, contradictory = key.
+	 * A subsequent call here might have us use "a = 'fop' AND b = 42".  Note
 	 * that we treat = and >= as equivalent when scanning forwards (just as we
 	 * treat = and <= as equivalent when scanning backwards).  We effectively
 	 * do the same thing (though with a distinct "a" element/value) each time.
 	 *
 	 * All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP
 	 * array keys whose array is "null_elem=true") imply a NOT NULL qualifier.
@ -1011,21 +1016,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	 * traversing a lot of null entries at the start of the scan.
 	 *
 	 * In this loop, row-comparison keys are treated the same as keys on their
-	 * first (leftmost) columns.  We'll add on lower-order columns of the row
+	 * first (leftmost) columns.  We'll add all lower-order columns of the row
-	 * comparison below, if possible.
+	 * comparison that were marked required during preprocessing below.
 	 *
-	 * The selected scan keys (at most one per index column) are remembered by
+	 * _bt_advance_array_keys needs to know exactly how we'll reposition the
-	 * storing their addresses into the local startKeys[] array.
+	 * scan (should it opt to schedule another primitive index scan).  It is
-	 *
+	 * critical that primscans only be scheduled when they'll definitely make
-	 * _bt_checkkeys/_bt_advance_array_keys decide whether and when to start
+	 * some useful progress.  _bt_advance_array_keys does this by calling
-	 * the next primitive index scan (for scans with array keys) based in part
+	 * _bt_checkkeys routines that report whether a tuple is past the end of
-	 * on an understanding of how it'll enable us to reposition the scan.
+	 * matches for the scan's keys (given the scan's current array elements).
-	 * They're directly aware of how we'll sometimes cons up an explicit
+	 * If the page's final tuple is "after the end of matches" for a scan that
-	 * SK_SEARCHNOTNULL key.  They'll even end primitive scans by applying a
+	 * uses the *opposite* scan direction, then it must follow that it's also
-	 * symmetric "deduce NOT NULL" rule of their own.  This allows top-level
+	 * "before the start of matches" for the actual current scan direction.
-	 * scans to skip large groups of NULLs through repeated deductions about
+	 * It is therefore essential that all of our initial positioning rules are
-	 * key strictness (for a required inequality key) and whether NULLs in the
+	 * symmetric with _bt_checkkeys's corresponding continuescan=false rule.
 	 * key's index column are stored last or first (relative to non-NULLs).
 	 * If you update anything here, _bt_checkkeys/_bt_advance_array_keys might
 	 * need to be kept in sync.
 	 *----------
@ -1034,18 +1038,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	if (so->numberOfKeys > 0)
 	{
 		AttrNumber	curattr;
-		ScanKey		chosen;
+		ScanKey		bkey;
 		ScanKey		impliesNN;
 		ScanKey		cur;
 		/*
-		 * chosen is the so-far-chosen key for the current attribute, if any.
+		 * bkey will be set to the key that preprocessing left behind as the
-		 * We don't cast the decision in stone until we reach keys for the
+		 * boundary key for this attribute, in this scan direction (if any)
 		 * next attribute.
 		 */
 		cur = so->keyData;
 		curattr = 1;
-		chosen = NULL;
+		bkey = NULL;
 		/* Also remember any scankey that implies a NOT NULL constraint */
 		impliesNN = NULL;
@ -1058,23 +1061,29 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 		{
 			if (i >= so->numberOfKeys || cur->sk_attno != curattr)
 			{
 				/* Done looking for the curattr boundary key */
 				Assert(bkey == NULL ||
 					   (bkey->sk_attno == curattr &&
 						(bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
 				Assert(impliesNN == NULL ||
 					   (impliesNN->sk_attno == curattr &&
 						(impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
 				/*
 				 * Done looking at keys for curattr.
 				 *
 				 * If this is a scan key for a skip array whose current
 				 * element is MINVAL, choose low_compare (when scanning
 				 * backwards it'll be MAXVAL, and we'll choose high_compare).
 				 *
-				 * Note: if the array's low_compare key makes 'chosen' NULL,
+				 * Note: if the array's low_compare key makes 'bkey' NULL,
 				 * then we behave as if the array's first element is -inf,
 				 * except when !array->null_elem implies a usable NOT NULL
 				 * constraint.
 				 */
-				if (chosen != NULL &&
+				if (bkey != NULL &&
-					(chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
+					(bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
 				{
-					int			ikey = chosen - so->keyData;
+					int			ikey = bkey - so->keyData;
-					ScanKey		skipequalitykey = chosen;
+					ScanKey		skipequalitykey = bkey;
 					BTArrayKeyInfo *array = NULL;
 					for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
@ -1087,35 +1096,35 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 					if (ScanDirectionIsForward(dir))
 					{
 						Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL));
-						chosen = array->low_compare;
+						bkey = array->low_compare;
 					}
 					else
 					{
 						Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL));
-						chosen = array->high_compare;
+						bkey = array->high_compare;
 					}
-					Assert(chosen == NULL ||
+					Assert(bkey == NULL ||
-						   chosen->sk_attno == skipequalitykey->sk_attno);
+						   bkey->sk_attno == skipequalitykey->sk_attno);
 					if (!array->null_elem)
 						impliesNN = skipequalitykey;
 					else
-						Assert(chosen == NULL && impliesNN == NULL);
+						Assert(bkey == NULL && impliesNN == NULL);
 				}
 				/*
 				 * If we didn't find a usable boundary key, see if we can
 				 * deduce a NOT NULL key
 				 */
-				if (chosen == NULL && impliesNN != NULL &&
+				if (bkey == NULL && impliesNN != NULL &&
 					((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
 					 ScanDirectionIsForward(dir) :
 					 ScanDirectionIsBackward(dir)))
 				{
 					/* Yes, so build the key in notnullkeys[keysz] */
-					chosen = &notnullkeys[keysz];
+					bkey = &notnullkeys[keysz];
-					ScanKeyEntryInitialize(chosen,
+					ScanKeyEntryInitialize(bkey,
 										   (SK_SEARCHNOTNULL | SK_ISNULL |
 											(impliesNN->sk_flags &
 											 (SK_BT_DESC | SK_BT_NULLS_FIRST))),
@ -1130,12 +1139,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				}
 				/*
-				 * If we still didn't find a usable boundary key, quit; else
+				 * If preprocessing didn't leave a usable boundary key, quit;
-				 * save the boundary key pointer in startKeys.
+				 * else save the boundary key pointer in startKeys[]
 				 */
-				if (chosen == NULL)
+				if (bkey == NULL)
 					break;
-				startKeys[keysz++] = chosen;
+				startKeys[keysz++] = bkey;
 				/*
 				 * We can only consider adding more boundary keys when the one
@ -1143,7 +1152,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				 * (during backwards scans we can only do so when the key that
 				 * we just added to startKeys[] uses the = or <= strategy)
 				 */
-				strat_total = chosen->sk_strategy;
+				strat_total = bkey->sk_strategy;
 				if (strat_total == BTGreaterStrategyNumber ||
 					strat_total == BTLessStrategyNumber)
 					break;
@ -1154,19 +1163,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				 * make strat_total > or < (and stop adding boundary keys).
 				 * This can only happen with opclasses that lack skip support.
 				 */
-				if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
+				if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
 				{
-					Assert(chosen->sk_flags & SK_BT_SKIP);
+					Assert(bkey->sk_flags & SK_BT_SKIP);
 					Assert(strat_total == BTEqualStrategyNumber);
 					if (ScanDirectionIsForward(dir))
 					{
-						Assert(!(chosen->sk_flags & SK_BT_PRIOR));
+						Assert(!(bkey->sk_flags & SK_BT_PRIOR));
 						strat_total = BTGreaterStrategyNumber;
 					}
 					else
 					{
-						Assert(!(chosen->sk_flags & SK_BT_NEXT));
+						Assert(!(bkey->sk_flags & SK_BT_NEXT));
 						strat_total = BTLessStrategyNumber;
 					}
@ -1180,24 +1189,30 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				/*
 				 * Done if that was the last scan key output by preprocessing.
-				 * Also done if there is a gap index attribute that lacks a
+				 * Also done if we've now examined all keys marked required.
 				 * usable key (only possible when preprocessing was unable to
 				 * generate a skip array key to "fill in the gap").
 				 */
 				if (i >= so->numberOfKeys ||
-					cur->sk_attno != curattr + 1)
+					!(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 					break;
 				/*
 				 * Reset for next attr.
 				 */
 				Assert(cur->sk_attno == curattr + 1);
 				curattr = cur->sk_attno;
-				chosen = NULL;
+				bkey = NULL;
 				impliesNN = NULL;
 			}
 			/*
-			 * Can we use this key as a starting boundary for this attr?
+			 * If we've located the starting boundary key for curattr, we have
 			 * no interest in curattr's other required key
 			 */
 			if (bkey != NULL)
 				continue;
 			/*
 			 * Is this key the starting boundary key for curattr?
 			 *
 			 * If not, does it imply a NOT NULL constraint?  (Because
 			 * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
@ -1207,27 +1222,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 			{
 				case BTLessStrategyNumber:
 				case BTLessEqualStrategyNumber:
-					if (chosen == NULL)
+					if (ScanDirectionIsBackward(dir))
-					{
+						bkey = cur;
-						if (ScanDirectionIsBackward(dir))
+					else if (impliesNN == NULL)
-							chosen = cur;
+						impliesNN = cur;
 						else
 							impliesNN = cur;
 					}
 					break;
 				case BTEqualStrategyNumber:
-					/* override any non-equality choice */
+					bkey = cur;
 					chosen = cur;
 					break;
 				case BTGreaterEqualStrategyNumber:
 				case BTGreaterStrategyNumber:
-					if (chosen == NULL)
+					if (ScanDirectionIsForward(dir))
-					{
+						bkey = cur;
-						if (ScanDirectionIsForward(dir))
+					else if (impliesNN == NULL)
-							chosen = cur;
+						impliesNN = cur;
 						else
 							impliesNN = cur;
 					}
 					break;
 			}
 		}
@ -1253,16 +1261,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	Assert(keysz <= INDEX_MAX_KEYS);
 	for (int i = 0; i < keysz; i++)
 	{
-		ScanKey		cur = startKeys[i];
+		ScanKey		bkey = startKeys[i];
-		Assert(cur->sk_attno == i + 1);
+		Assert(bkey->sk_attno == i + 1);
-		if (cur->sk_flags & SK_ROW_HEADER)
+		if (bkey->sk_flags & SK_ROW_HEADER)
 		{
 			/*
 			 * Row comparison header: look to the first row member instead
 			 */
-			ScanKey		subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
+			ScanKey		subkey = (ScanKey) DatumGetPointer(bkey->sk_argument);
 			bool		loosen_strat = false,
 						tighten_strat = false;
 			/*
 			 * Cannot be a NULL in the first row member: _bt_preprocess_keys
@ -1270,9 +1280,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 			 * ever getting this far
 			 */
 			Assert(subkey->sk_flags & SK_ROW_MEMBER);
-			Assert(subkey->sk_attno == cur->sk_attno);
+			Assert(subkey->sk_attno == bkey->sk_attno);
 			Assert(!(subkey->sk_flags & SK_ISNULL));
 			/*
 			 * This is either a > or >= key (during backwards scans it is
 			 * either < or <=) that was marked required during preprocessing.
 			 * Later so->keyData[] keys can't have been marked required, so
 			 * our row compare header key must be the final startKeys[] entry.
 			 */
 			Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD));
 			Assert(i == keysz - 1);
 			/*
 			 * The member scankeys are already in insertion format (ie, they
 			 * have sk_func = 3-way-comparison function)
@ -1280,112 +1299,141 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 			memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
 			/*
-			 * If the row comparison is the last positioning key we accepted,
+			 * Now look to later row compare members.
-			 * try to add additional keys from the lower-order row members.
+			 *
-			 * (If we accepted independent conditions on additional index
+			 * If there's an "index attribute gap" between two row compare
-			 * columns, we use those instead --- doesn't seem worth trying to
+			 * members, the second member won't have been marked required, and
-			 * determine which is more restrictive.)  Note that this is OK
+			 * so can't be used as a starting boundary key here.  The part of
-			 * even if the row comparison is of ">" or "<" type, because the
+			 * the row comparison that we do still use has to be treated as a
-			 * condition applied to all but the last row member is effectively
+			 * ">=" or "<=" condition.  For example, a qual "(a, c) > (1, 42)"
-			 * ">=" or "<=", and so the extra keys don't break the positioning
+			 * with an omitted intervening index attribute "b" will use an
-			 * scheme.  But, by the same token, if we aren't able to use all
+			 * insertion scan key "a >= 1".  Even the first "a = 1" tuple on
-			 * the row members, then the part of the row comparison that we
+			 * the leaf level might satisfy the row compare qual.
-			 * did use has to be treated as just a ">=" or "<=" condition, and
+			 *
-			 * so we'd better adjust strat_total accordingly.
+			 * We're able to use a _more_ restrictive strategy when we reach a
 			 * NULL row compare member, since they're always unsatisfiable.
 			 * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an
 			 * insertion scan key "a > 1".  All tuples where "a = 1" cannot
 			 * possibly satisfy the row compare qual, so this is safe.
 			 */
-			if (i == keysz - 1)
+			Assert(!(subkey->sk_flags & SK_ROW_END));
 			for (;;)
 			{
-				bool		used_all_subkeys = false;
+				subkey++;
 				Assert(subkey->sk_flags & SK_ROW_MEMBER);
-				Assert(!(subkey->sk_flags & SK_ROW_END));
+				if (subkey->sk_flags & SK_ISNULL)
 				for (;;)
 				{
-					subkey++;
+					/*
-					Assert(subkey->sk_flags & SK_ROW_MEMBER);
+					 * NULL member key, can only use earlier keys.
-					if (subkey->sk_attno != keysz + 1)
+					 *
-						break;	/* out-of-sequence, can't use it */
+					 * We deliberately avoid checking if this key is marked
-					if (subkey->sk_strategy != cur->sk_strategy)
+					 * required.  All earlier keys are required, and this key
-						break;	/* wrong direction, can't use it */
+					 * is unsatisfiable either way, so we can't miss anything.
-					if (subkey->sk_flags & SK_ISNULL)
+					 */
-						break;	/* can't use null keys */
+					tighten_strat = true;
-					Assert(keysz < INDEX_MAX_KEYS);
+					break;
 					memcpy(inskey.scankeys + keysz, subkey,
 						   sizeof(ScanKeyData));
 					keysz++;
 					if (subkey->sk_flags & SK_ROW_END)
 					{
 						used_all_subkeys = true;
 						break;
 					}
 				}
-				if (!used_all_subkeys)
+
 				if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 				{
-					switch (strat_total)
+					/* nonrequired member key, can only use earlier keys */
-					{
+					loosen_strat = true;
-						case BTLessStrategyNumber:
+					break;
 							strat_total = BTLessEqualStrategyNumber;
 							break;
 						case BTGreaterStrategyNumber:
 							strat_total = BTGreaterEqualStrategyNumber;
 							break;
 					}
 				}
-				break;			/* done with outer loop */
+
 				Assert(subkey->sk_attno == keysz + 1);
 				Assert(subkey->sk_strategy == bkey->sk_strategy);
 				Assert(keysz < INDEX_MAX_KEYS);
 				memcpy(inskey.scankeys + keysz, subkey,
 					   sizeof(ScanKeyData));
 				keysz++;
 				if (subkey->sk_flags & SK_ROW_END)
 					break;
 			}
 			Assert(!(loosen_strat && tighten_strat));
 			if (loosen_strat)
 			{
 				/* Use less restrictive strategy (and fewer member keys) */
 				switch (strat_total)
 				{
 					case BTLessStrategyNumber:
 						strat_total = BTLessEqualStrategyNumber;
 						break;
 					case BTGreaterStrategyNumber:
 						strat_total = BTGreaterEqualStrategyNumber;
 						break;
 				}
 			}
 			if (tighten_strat)
 			{
 				/* Use more restrictive strategy (and fewer member keys) */
 				switch (strat_total)
 				{
 					case BTLessEqualStrategyNumber:
 						strat_total = BTLessStrategyNumber;
 						break;
 					case BTGreaterEqualStrategyNumber:
 						strat_total = BTGreaterStrategyNumber;
 						break;
 				}
 			}
 			/* done adding to inskey (row comparison keys always come last) */
 			break;
 		}
 		/*
 		 * Ordinary comparison key/search-style key.
 		 *
 		 * Transform the search-style scan key to an insertion scan key by
 		 * replacing the sk_func with the appropriate btree 3-way-comparison
 		 * function.
 		 *
 		 * If scankey operator is not a cross-type comparison, we can use the
 		 * cached comparison function; otherwise gotta look it up in the
 		 * catalogs.  (That can't lead to infinite recursion, since no
 		 * indexscan initiated by syscache lookup will use cross-data-type
 		 * operators.)
 		 *
 		 * We support the convention that sk_subtype == InvalidOid means the
 		 * opclass input type; this hack simplifies life for ScanKeyInit().
 		 */
 		if (bkey->sk_subtype == rel->rd_opcintype[i] ||
 			bkey->sk_subtype == InvalidOid)
 		{
 			FmgrInfo   *procinfo;
 			procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC);
 			ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
 										   bkey->sk_flags,
 										   bkey->sk_attno,
 										   InvalidStrategy,
 										   bkey->sk_subtype,
 										   bkey->sk_collation,
 										   procinfo,
 										   bkey->sk_argument);
 		}
 		else
 		{
-			/*
+			RegProcedure cmp_proc;
 			 * Ordinary comparison key.  Transform the search-style scan key
 			 * to an insertion scan key by replacing the sk_func with the
 			 * appropriate btree comparison function.
 			 *
 			 * If scankey operator is not a cross-type comparison, we can use
 			 * the cached comparison function; otherwise gotta look it up in
 			 * the catalogs.  (That can't lead to infinite recursion, since no
 			 * indexscan initiated by syscache lookup will use cross-data-type
 			 * operators.)
 			 *
 			 * We support the convention that sk_subtype == InvalidOid means
 			 * the opclass input type; this is a hack to simplify life for
 			 * ScanKeyInit().
 			 */
 			if (cur->sk_subtype == rel->rd_opcintype[i] ||
 				cur->sk_subtype == InvalidOid)
 			{
 				FmgrInfo   *procinfo;
-				procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
+			cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
-				ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
+										 rel->rd_opcintype[i],
-											   cur->sk_flags,
+										 bkey->sk_subtype, BTORDER_PROC);
-											   cur->sk_attno,
+			if (!RegProcedureIsValid(cmp_proc))
-											   InvalidStrategy,
+				elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
-											   cur->sk_subtype,
+					 BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype,
-											   cur->sk_collation,
+					 bkey->sk_attno, RelationGetRelationName(rel));
-											   procinfo,
+			ScanKeyEntryInitialize(inskey.scankeys + i,
-											   cur->sk_argument);
+								   bkey->sk_flags,
-			}
+								   bkey->sk_attno,
-			else
+								   InvalidStrategy,
-			{
+								   bkey->sk_subtype,
-				RegProcedure cmp_proc;
+								   bkey->sk_collation,
-
+								   cmp_proc,
-				cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
+								   bkey->sk_argument);
 											 rel->rd_opcintype[i],
 											 cur->sk_subtype,
 											 BTORDER_PROC);
 				if (!RegProcedureIsValid(cmp_proc))
 					elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
 						 BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
 						 cur->sk_attno, RelationGetRelationName(rel));
 				ScanKeyEntryInitialize(inskey.scankeys + i,
 									   cur->sk_flags,
 									   cur->sk_attno,
 									   InvalidStrategy,
 									   cur->sk_subtype,
 									   cur->sk_collation,
 									   cmp_proc,
 									   cur->sk_argument);
 			}
 		}
 	}
@ -1474,6 +1522,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	if (!BufferIsValid(so->currPos.buf))
 	{
 		Assert(!so->needPrimScan);
 		/*
 		 * We only get here if the index is completely empty. Lock relation
 		 * because nothing finer to lock exists.  Without a buffer lock, it's
@ -1492,7 +1542,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 		if (!BufferIsValid(so->currPos.buf))
 		{
 			Assert(!so->needPrimScan);
 			_bt_parallel_done(scan);
 			return false;
 		}
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra
 static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
 static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
 											 bool *skip_array_set);
 static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
 static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
 										 IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
 										 bool readpagetup, int sktrig, bool *scanBehind);
@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
 								   IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
 								   int sktrig, bool sktrig_required);
 #ifdef USE_ASSERT_CHECKING
 static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir);
 static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
 #endif
 static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
@ -1034,73 +1032,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
 	return false;
 }
 /*
 * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required
 *
 * Called when _bt_advance_array_keys decides to start a new primitive index
 * scan on the basis of the current scan position being before the position
 * that _bt_first is capable of repositioning the scan to by applying an
 * inequality operator required in the opposite-to-scan direction only.
 *
 * Although equality strategy scan keys (for both arrays and non-arrays alike)
 * are either marked required in both directions or in neither direction,
 * there is a sense in which non-required arrays behave like required arrays.
 * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)",
 * the scan key on "c" is non-required, but nevertheless enables positioning
 * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the
 * first descent of the tree by _bt_first.  Later on, there could also be a
 * second descent, that places the scan right before tuples >= "(200, 3, 5)".
 * _bt_first must never be allowed to build an insertion scan key whose "c"
 * entry is set to a value other than 5, the "c" array's first element/value.
 * (Actually, it's the first in the current scan direction.  This example uses
 * a forward scan.)
 *
 * Calling here resets the array scan key elements for the scan's non-required
 * arrays.  This is strictly necessary for correctness in a subset of cases
 * involving "required in opposite direction"-triggered primitive index scans.
 * Not all callers are at risk of _bt_first using a non-required array like
 * this, but advancement always resets the arrays when another primitive scan
 * is scheduled, just to keep things simple.  Array advancement even makes
 * sure to reset non-required arrays during scans that have no inequalities.
 * (Advancement still won't call here when there are no inequalities, though
 * that's just because it's all handled indirectly instead.)
 *
 * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that
 * everybody got this right.
 *
 * Note: In practice almost all SAOP arrays are marked required during
 * preprocessing (if necessary by generating skip arrays).  It is hardly ever
 * truly necessary to call here, but consistently doing so is simpler.
 */
 static void
 _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir)
 {
 	Relation	rel = scan->indexRelation;
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	int			arrayidx = 0;
 	for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
 	{
 		ScanKey		cur = so->keyData + ikey;
 		BTArrayKeyInfo *array = NULL;
 		if (!(cur->sk_flags & SK_SEARCHARRAY) ||
 			cur->sk_strategy != BTEqualStrategyNumber)
 			continue;
 		array = &so->arrayKeys[arrayidx++];
 		Assert(array->scan_key == ikey);
 		if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 			continue;
 		Assert(array->num_elems != -1); /* No non-required skip arrays */
 		_bt_array_set_low_or_high(rel, cur, array,
 								  ScanDirectionIsForward(dir));
 	}
 }
 /*
 * _bt_tuple_before_array_skeys() -- too early to advance required arrays?
 *
@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
 	 */
 	if (so->needPrimScan)
 	{
 		Assert(_bt_verify_arrays_bt_first(scan, dir));
 		/*
 		 * Flag was set -- must call _bt_first again, which will reset the
 		 * scan's needPrimScan flag
@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
 	 */
 	else if (has_required_opposite_direction_only && pstate->finaltup &&
 			 unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup)))
 	{
 		/*
 		 * Make sure that any SAOP arrays that were not marked required by
 		 * preprocessing are reset to their first element for this direction
 		 */
 		_bt_rewind_nonrequired_arrays(scan, dir);
 		goto new_prim_scan;
 	}
 continue_scan:
@ -2045,8 +1967,6 @@ continue_scan:
 		 */
 		so->oppositeDirCheck = has_required_opposite_direction_only;
 		_bt_rewind_nonrequired_arrays(scan, dir);
 		/*
 		 * skip by setting "look ahead" mechanism's offnum for forwards scans
 		 * (backwards scans check scanBehind flag directly instead)
@ -2142,48 +2062,6 @@ end_toplevel_scan:
 }
 #ifdef USE_ASSERT_CHECKING
 /*
 * Verify that the scan's qual state matches what we expect at the point that
 * _bt_start_prim_scan is about to start a just-scheduled new primitive scan.
 *
 * We enforce a rule against non-required array scan keys: they must start out
 * with whatever element is the first for the scan's current scan direction.
 * See _bt_rewind_nonrequired_arrays comments for an explanation.
 */
 static bool
 _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir)
 {
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	int			arrayidx = 0;
 	for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
 	{
 		ScanKey		cur = so->keyData + ikey;
 		BTArrayKeyInfo *array = NULL;
 		int			first_elem_dir;
 		if (!(cur->sk_flags & SK_SEARCHARRAY) ||
 			cur->sk_strategy != BTEqualStrategyNumber)
 			continue;
 		array = &so->arrayKeys[arrayidx++];
 		if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
 			((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
 			continue;
 		if (ScanDirectionIsForward(dir))
 			first_elem_dir = 0;
 		else
 			first_elem_dir = array->num_elems - 1;
 		if (array->cur_elem != first_elem_dir)
 			return false;
 	}
 	return _bt_verify_keys_with_arraykeys(scan);
 }
 /*
 * Verify that the scan's "so->keyData[]" scan keys are in agreement with
 * its array key state
@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	int			last_sk_attno = InvalidAttrNumber,
 				arrayidx = 0;
 	bool		nonrequiredseen = false;
 	if (!so->qual_ok)
 		return false;
@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
 		if (array->num_elems != -1 &&
 			cur->sk_argument != array->elem_values[array->cur_elem])
 			return false;
-		if (last_sk_attno > cur->sk_attno)
+		if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
-			return false;
+		{
 			if (last_sk_attno > cur->sk_attno)
 				return false;
 			if (nonrequiredseen)
 				return false;
 		}
 		else
 			nonrequiredseen = true;
 		last_sk_attno = cur->sk_attno;
 	}
@ -2551,37 +2438,12 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
 		if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 		{
 			/* Scan key isn't marked required (corner case) */
 			Assert(!(key->sk_flags & SK_ROW_HEADER));
 			break;				/* unsafe */
 		}
 		if (key->sk_flags & SK_ROW_HEADER)
 		{
-			/*
+			/* RowCompare inequalities currently aren't supported */
-			 * RowCompare inequality.
+			break;				/* "unsafe" */
 			 *
 			 * Only the first subkey from a RowCompare can ever be marked
 			 * required (that happens when the row header is marked required).
 			 * There is no simple, general way for us to transitively deduce
 			 * whether or not every tuple on the page satisfies a RowCompare
 			 * key based only on firsttup and lasttup -- so we just give up.
 			 */
 			if (!start_past_saop_eq && !so->skipScan)
 				break;			/* unsafe to go further */
 			/*
 			 * We have to be even more careful with RowCompares that come
 			 * after an array: we assume it's unsafe to even bypass the array.
 			 * Calling _bt_start_array_keys to recover the scan's arrays
 			 * following use of forcenonrequired mode isn't compatible with
 			 * _bt_check_rowcompare's continuescan=false behavior with NULL
 			 * row compare members.  _bt_advance_array_keys must not make a
 			 * decision on the basis of a key not being satisfied in the
 			 * opposite-to-scan direction until the scan reaches a leaf page
 			 * where the same key begins to be satisfied in scan direction.
 			 * The _bt_first !used_all_subkeys behavior makes this limitation
 			 * hard to work around some other way.
 			 */
 			return;				/* completely unsafe to set pstate.startikey */
 		}
 		if (key->sk_strategy != BTEqualStrategyNumber)
 		{
@ -3078,76 +2940,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 		Assert(subkey->sk_flags & SK_ROW_MEMBER);
-		if (subkey->sk_attno > tupnatts)
+		/* When a NULL row member is compared, the row never matches */
 		{
 			/*
 			 * This attribute is truncated (must be high key).  The value for
 			 * this attribute in the first non-pivot tuple on the page to the
 			 * right could be any possible value.  Assume that truncated
 			 * attribute passes the qual.
 			 */
 			Assert(BTreeTupleIsPivot(tuple));
 			cmpresult = 0;
 			if (subkey->sk_flags & SK_ROW_END)
 				break;
 			subkey++;
 			continue;
 		}
 		datum = index_getattr(tuple,
 							  subkey->sk_attno,
 							  tupdesc,
 							  &isNull);
 		if (isNull)
 		{
 			if (forcenonrequired)
 			{
 				/* treating scan's keys as non-required */
 			}
 			else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
 			{
 				/*
 				 * Since NULLs are sorted before non-NULLs, we know we have
 				 * reached the lower limit of the range of values for this
 				 * index attr.  On a backward scan, we can stop if this qual
 				 * is one of the "must match" subset.  We can stop regardless
 				 * of whether the qual is > or <, so long as it's required,
 				 * because it's not possible for any future tuples to pass. On
 				 * a forward scan, however, we must keep going, because we may
 				 * have initially positioned to the start of the index.
 				 * (_bt_advance_array_keys also relies on this behavior during
 				 * forward scans.)
 				 */
 				if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
 					ScanDirectionIsBackward(dir))
 					*continuescan = false;
 			}
 			else
 			{
 				/*
 				 * Since NULLs are sorted after non-NULLs, we know we have
 				 * reached the upper limit of the range of values for this
 				 * index attr.  On a forward scan, we can stop if this qual is
 				 * one of the "must match" subset.  We can stop regardless of
 				 * whether the qual is > or <, so long as it's required,
 				 * because it's not possible for any future tuples to pass. On
 				 * a backward scan, however, we must keep going, because we
 				 * may have initially positioned to the end of the index.
 				 * (_bt_advance_array_keys also relies on this behavior during
 				 * backward scans.)
 				 */
 				if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
 					ScanDirectionIsForward(dir))
 					*continuescan = false;
 			}
 			/*
 			 * In any case, this indextuple doesn't match the qual.
 			 */
 			return false;
 		}
 		if (subkey->sk_flags & SK_ISNULL)
 		{
 			/*
@ -3172,6 +2965,114 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			return false;
 		}
 		if (subkey->sk_attno > tupnatts)
 		{
 			/*
 			 * This attribute is truncated (must be high key).  The value for
 			 * this attribute in the first non-pivot tuple on the page to the
 			 * right could be any possible value.  Assume that truncated
 			 * attribute passes the qual.
 			 */
 			Assert(BTreeTupleIsPivot(tuple));
 			return true;
 		}
 		datum = index_getattr(tuple,
 							  subkey->sk_attno,
 							  tupdesc,
 							  &isNull);
 		if (isNull)
 		{
 			int			reqflags;
 			if (forcenonrequired)
 			{
 				/* treating scan's keys as non-required */
 			}
 			else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
 			{
 				/*
 				 * Since NULLs are sorted before non-NULLs, we know we have
 				 * reached the lower limit of the range of values for this
 				 * index attr.  On a backward scan, we can stop if this qual
 				 * is one of the "must match" subset.  However, on a forwards
 				 * scan, we must keep going, because we may have initially
 				 * positioned to the start of the index.
 				 *
 				 * All required NULLS FIRST > row members can use NULL tuple
 				 * values to end backwards scans, just like with other values.
 				 * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a
 				 * backwards scan upon reaching the index's rightmost "a = 9"
 				 * tuple whose "b" column contains a NULL (if not sooner).
 				 * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42.
 				 */
 				reqflags = SK_BT_REQBKWD;
 				/*
 				 * When a most significant required NULLS FIRST < row compare
 				 * member sees NULL tuple values during a backwards scan, it
 				 * signals the end of matches for the whole row compare/scan.
 				 * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a
 				 * backwards scan upon reaching the rightmost tuple whose "a"
 				 * column has a NULL.  The "a" NULL value is "<" 9, and yet
 				 * our < row compare will still end the scan.  (This isn't
 				 * safe with later/lower-order row members.  Notice that it
 				 * can only happen with an "a" NULL some time after the scan
 				 * completely stops needing to use its "b" and "c" members.)
 				 */
 				if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
 					reqflags |= SK_BT_REQFWD;	/* safe, first row member */
 				if ((subkey->sk_flags & reqflags) &&
 					ScanDirectionIsBackward(dir))
 					*continuescan = false;
 			}
 			else
 			{
 				/*
 				 * Since NULLs are sorted after non-NULLs, we know we have
 				 * reached the upper limit of the range of values for this
 				 * index attr.  On a forward scan, we can stop if this qual is
 				 * one of the "must match" subset.  However, on a backward
 				 * scan, we must keep going, because we may have initially
 				 * positioned to the end of the index.
 				 *
 				 * All required NULLS LAST < row members can use NULL tuple
 				 * values to end forwards scans, just like with other values.
 				 * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a
 				 * forwards scan upon reaching the index's leftmost "a = 9"
 				 * tuple whose "b" column contains a NULL (if not sooner).
 				 * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42.
 				 */
 				reqflags = SK_BT_REQFWD;
 				/*
 				 * When a most significant required NULLS LAST > row compare
 				 * member sees NULL tuple values during a forwards scan, it
 				 * signals the end of matches for the whole row compare/scan.
 				 * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a
 				 * forwards scan upon reaching the leftmost tuple whose "a"
 				 * column has a NULL.  The "a" NULL value is ">" 9, and yet
 				 * our > row compare will end the scan.  (This isn't safe with
 				 * later/lower-order row members.  Notice that it can only
 				 * happen with an "a" NULL some time after the scan completely
 				 * stops needing to use its "b" and "c" members.)
 				 */
 				if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
 					reqflags |= SK_BT_REQBKWD;	/* safe, first row member */
 				if ((subkey->sk_flags & reqflags) &&
 					ScanDirectionIsForward(dir))
 					*continuescan = false;
 			}
 			/*
 			 * In any case, this indextuple doesn't match the qual.
 			 */
 			return false;
 		}
 		/* Perform the test --- three-way comparison not bool operator */
 		cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
 													subkey->sk_collation,
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@ -2668,6 +2668,12 @@ alter_table_cmd:
 						c->alterDeferrability = true;
 					if ($4 & CAS_NO_INHERIT)
 						c->alterInheritability = true;
 					/* handle unsupported case with specific error message */
 					if ($4 & CAS_NOT_VALID)
 						ereport(ERROR,
 								errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 								errmsg("constraints cannot be altered to be NOT VALID"),
 								parser_errposition(@4));
 					processCASbits($4, @4, "FOREIGN KEY",
 									&c->deferrable,
 									&c->initdeferred,
--- a/src/bin/pg_verifybackup/t/008_untar.pl
+++ b/src/bin/pg_verifybackup/t/008_untar.pl
@ -16,6 +16,22 @@ my $primary = PostgreSQL::Test::Cluster->new('primary');
 $primary->init(allows_streaming => 1);
 $primary->start;
 # Create file with some random data and an arbitrary size, useful to check
 # the solidity of the compression and decompression logic.  The size of the
 # file is chosen to be around 640kB.  This has proven to be large enough to
 # detect some issues related to LZ4, and low enough to not impact the runtime
 # of the test significantly.
 my $junk_data = $primary->safe_psql(
 	'postgres', qq(
 		SELECT string_agg(encode(sha256(i::bytea), 'hex'), '')
 		FROM generate_series(1, 10240) s(i);));
 my $data_dir = $primary->data_dir;
 my $junk_file = "$data_dir/junk";
 open my $jf, '>', $junk_file
  or die "Could not create junk file: $!";
 print $jf $junk_data;
 close $jf;
 # Create a tablespace directory.
 my $source_ts_path = PostgreSQL::Test::Utils::tempdir_short();
@ -52,6 +68,12 @@ my @test_configuration = (
 		'backup_archive' => [ 'base.tar.lz4', "$tsoid.tar.lz4" ],
 		'enabled' => check_pg_config("#define USE_LZ4 1")
 	},
 	{
 		'compression_method' => 'lz4',
 		'backup_flags' => [ '--compress', 'server-lz4:5' ],
 		'backup_archive' => [ 'base.tar.lz4', "$tsoid.tar.lz4" ],
 		'enabled' => check_pg_config("#define USE_LZ4 1")
 	},
 	{
 		'compression_method' => 'zstd',
 		'backup_flags' => [ '--compress', 'server-zstd' ],
--- a/src/bin/pg_verifybackup/t/010_client_untar.pl
+++ b/src/bin/pg_verifybackup/t/010_client_untar.pl
@ -15,6 +15,22 @@ my $primary = PostgreSQL::Test::Cluster->new('primary');
 $primary->init(allows_streaming => 1);
 $primary->start;
 # Create file with some random data and an arbitrary size, useful to check
 # the solidity of the compression and decompression logic.  The size of the
 # file is chosen to be around 640kB.  This has proven to be large enough to
 # detect some issues related to LZ4, and low enough to not impact the runtime
 # of the test significantly.
 my $junk_data = $primary->safe_psql(
 	'postgres', qq(
 		SELECT string_agg(encode(sha256(i::bytea), 'hex'), '')
 		FROM generate_series(1, 10240) s(i);));
 my $data_dir = $primary->data_dir;
 my $junk_file = "$data_dir/junk";
 open my $jf, '>', $junk_file
  or die "Could not create junk file: $!";
 print $jf $junk_data;
 close $jf;
 my $backup_path = $primary->backup_dir . '/client-backup';
 my $extract_path = $primary->backup_dir . '/extracted-backup';
@ -37,6 +53,12 @@ my @test_configuration = (
 		'backup_archive' => 'base.tar.lz4',
 		'enabled' => check_pg_config("#define USE_LZ4 1")
 	},
 	{
 		'compression_method' => 'lz4',
 		'backup_flags' => [ '--compress', 'client-lz4:1' ],
 		'backup_archive' => 'base.tar.lz4',
 		'enabled' => check_pg_config("#define USE_LZ4 1")
 	},
 	{
 		'compression_method' => 'zstd',
 		'backup_flags' => [ '--compress', 'client-zstd:5' ],
--- a/src/fe_utils/astreamer_lz4.c
+++ b/src/fe_utils/astreamer_lz4.c
@ -322,9 +322,9 @@ astreamer_lz4_decompressor_content(astreamer *streamer,
 	mystreamer = (astreamer_lz4_frame *) streamer;
 	next_in = (uint8 *) data;
-	next_out = (uint8 *) mystreamer->base.bbs_buffer.data;
+	next_out = (uint8 *) mystreamer->base.bbs_buffer.data + mystreamer->bytes_written;
 	avail_in = len;
-	avail_out = mystreamer->base.bbs_buffer.maxlen;
+	avail_out = mystreamer->base.bbs_buffer.maxlen - mystreamer->bytes_written;
 	while (avail_in > 0)
 	{
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@ -72,7 +72,7 @@ pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len)
 {
 	if (__builtin_constant_p(len) && len < 32)
 	{
-		const unsigned char *p = data;
+		const unsigned char *p = (const unsigned char *) data;
 		/*
 		 * For small constant inputs, inline the computation to avoid a
--- a/src/interfaces/libpq/fe-cancel.c
+++ b/src/interfaces/libpq/fe-cancel.c
@ -137,6 +137,7 @@ PQcancelCreate(PGconn *conn)
 		goto oom_error;
 	originalHost = conn->connhost[conn->whichhost];
 	cancelConn->connhost[0].type = originalHost.type;
 	if (originalHost.host)
 	{
 		cancelConn->connhost[0].host = strdup(originalHost.host);
--- a/src/test/regress/expected/btree_index.out
+++ b/src/test/regress/expected/btree_index.out
@ -195,54 +195,123 @@ ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1;
 (1 row)
 --
-- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan
+-- Forwards scan RowCompare qual whose row arg has a NULL that affects our
 -- initial positioning strategy
 --
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
+   WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname, proargtypes, pronamespace;
-                                                 QUERY PLAN                                                  
+                                                  QUERY PLAN                                                   
-------------------------------------------------------------------------------------------------------------
+---------------------------------------------------------------------------------------------------------------
 Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc
-   Index Cond: ((ROW(proname, proargtypes) < ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name))
+   Index Cond: ((ROW(proname, proargtypes) >= ROW('abs'::name, NULL::oidvector)) AND (proname <= 'abs'::name))
 (2 rows)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
+   WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname, proargtypes, pronamespace;
 proname | proargtypes | pronamespace 
 ---------+-------------+--------------
 (0 rows)
 --
-- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL
+-- Forwards scan RowCompare quals whose row arg has a NULL that ends scan
 -- that ends scan
 --
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
+   WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL)
-ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
+ORDER BY proname, proargtypes, pronamespace;
-                                                 QUERY PLAN                                                  
+                                                  QUERY PLAN                                                  
-------------------------------------------------------------------------------------------------------------
+--------------------------------------------------------------------------------------------------------------
- Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc
+ Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc
-   Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name))
+   Index Cond: ((proname >= 'abs'::name) AND (ROW(proname, proargtypes) < ROW('abs'::name, NULL::oidvector)))
 (2 rows)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
+   WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL)
 ORDER BY proname, proargtypes, pronamespace;
 proname | proargtypes | pronamespace 
 ---------+-------------+--------------
 (0 rows)
 --
 -- Backwards scan RowCompare qual whose row arg has a NULL that affects our
 -- initial positioning strategy
 --
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL)
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
                                                  QUERY PLAN                                                   
 ---------------------------------------------------------------------------------------------------------------
 Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc
   Index Cond: ((proname >= 'abs'::name) AND (ROW(proname, proargtypes) <= ROW('abs'::name, NULL::oidvector)))
 (2 rows)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL)
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
 proname | proargtypes | pronamespace 
 ---------+-------------+--------------
 (0 rows)
 --
-- Add coverage for recheck of > key following array advancement on previous
+-- Backwards scan RowCompare qual whose row arg has a NULL that ends scan
-- (left sibling) page that used a high key whose attribute value corresponding
+--
-- to the > key was -inf (due to being truncated when the high key was created).
+explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
                                                  QUERY PLAN                                                  
 --------------------------------------------------------------------------------------------------------------
 Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc
   Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname <= 'abs'::name))
 (2 rows)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
 proname | proargtypes | pronamespace 
 ---------+-------------+--------------
 (0 rows)
 -- Makes B-Tree preprocessing deal with unmarking redundant keys that were
 -- initially marked required (test case relies on current row compare
 -- preprocessing limitations)
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL)
   AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077')
 ORDER BY proname, proargtypes, pronamespace;
                                                                                                     QUERY PLAN                                                                                                     
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc
   Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname = 'zzzzzz'::name) AND (proargtypes = ANY ('{"26 23",5077}'::oidvector[])) AND (pronamespace = ANY ('{1,2,3}'::oid[])))
 (2 rows)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL)
   AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077')
 ORDER BY proname, proargtypes, pronamespace;
 proname | proargtypes | pronamespace 
 ---------+-------------+--------------
 (0 rows)
 --
 -- Performs a recheck of > key following array advancement on previous (left
 -- sibling) page that used a high key whose attribute value corresponding to
 -- the > key was -inf (due to being truncated when the high key was created).
 --
 -- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated
 -- high key "(183, -inf)" on the first page that we'll scan.  The test will only
--- a/src/test/regress/expected/constraints.out
+++ b/src/test/regress/expected/constraints.out
@ -748,6 +748,11 @@ ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key ENFORCED;
 ERROR:  cannot alter enforceability of constraint "unique_tbl_i_key" of relation "unique_tbl"
 ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT ENFORCED;
 ERROR:  cannot alter enforceability of constraint "unique_tbl_i_key" of relation "unique_tbl"
 -- can't make an existing constraint NOT VALID
 ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT VALID;
 ERROR:  constraints cannot be altered to be NOT VALID
 LINE 1: ...ABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT VALID;
                                                             ^
 DROP TABLE unique_tbl;
 --
 -- EXCLUDE constraints
--- a/src/test/regress/expected/foreign_key.out
+++ b/src/test/regress/expected/foreign_key.out
@ -1359,7 +1359,7 @@ LINE 1: ...e ALTER CONSTRAINT fktable_fk_fkey NOT DEFERRABLE INITIALLY ...
 ALTER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey NO INHERIT;
 ERROR:  constraint "fktable_fk_fkey" of relation "fktable" is not a not-null constraint
 ALTER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey NOT VALID;
-ERROR:  FOREIGN KEY constraints cannot be marked NOT VALID
+ERROR:  constraints cannot be altered to be NOT VALID
 LINE 1: ...ER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey NOT VALID;
                                                             ^
 ALTER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey ENFORCED NOT ENFORCED;
--- a/src/test/regress/sql/btree_index.sql
+++ b/src/test/regress/sql/btree_index.sql
@ -143,38 +143,83 @@ SELECT proname, proargtypes, pronamespace
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1;
 --
-- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan
+-- Forwards scan RowCompare qual whose row arg has a NULL that affects our
 -- initial positioning strategy
 --
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
+   WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname, proargtypes, pronamespace;
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
+   WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname, proargtypes, pronamespace;
 --
-- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL
+-- Forwards scan RowCompare quals whose row arg has a NULL that ends scan
 -- that ends scan
 --
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
+   WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL)
 ORDER BY proname, proargtypes, pronamespace;
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL)
 ORDER BY proname, proargtypes, pronamespace;
 --
 -- Backwards scan RowCompare qual whose row arg has a NULL that affects our
 -- initial positioning strategy
 --
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL)
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
-   WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
+   WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL)
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
 --
-- Add coverage for recheck of > key following array advancement on previous
+-- Backwards scan RowCompare qual whose row arg has a NULL that ends scan
-- (left sibling) page that used a high key whose attribute value corresponding
+--
-- to the > key was -inf (due to being truncated when the high key was created).
+explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs'
 ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
 -- Makes B-Tree preprocessing deal with unmarking redundant keys that were
 -- initially marked required (test case relies on current row compare
 -- preprocessing limitations)
 explain (costs off)
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL)
   AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077')
 ORDER BY proname, proargtypes, pronamespace;
 SELECT proname, proargtypes, pronamespace
   FROM pg_proc
   WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL)
   AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077')
 ORDER BY proname, proargtypes, pronamespace;
 --
 -- Performs a recheck of > key following array advancement on previous (left
 -- sibling) page that used a high key whose attribute value corresponding to
 -- the > key was -inf (due to being truncated when the high key was created).
 --
 -- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated
 -- high key "(183, -inf)" on the first page that we'll scan.  The test will only
--- a/src/test/regress/sql/constraints.sql
+++ b/src/test/regress/sql/constraints.sql
@ -537,6 +537,9 @@ CREATE TABLE UNIQUE_NOTEN_TBL(i int UNIQUE NOT ENFORCED);
 ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key ENFORCED;
 ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT ENFORCED;
 -- can't make an existing constraint NOT VALID
 ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT VALID;
 DROP TABLE unique_tbl;
 --
Author	SHA1	Message	Date
Tom Lane	5d0800000e	Correctly copy the target host identification in PQcancelCreate. PQcancelCreate failed to copy struct pg_conn_host's "type" field, instead leaving it zero (a/k/a CHT_HOST_NAME). This seemingly has no great ill effects if it should have been CHT_UNIX_SOCKET instead, but if it should have been CHT_HOST_ADDRESS then a null-pointer dereference will occur when the cancelConn is used. Bug: #18974 Reported-by: Maxim Boguk <maxim.boguk@gmail.com> Author: Sergei Kornilov <sk@zsrv.org> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/18974-575f02b2168b36b3@postgresql.org Backpatch-through: 17	2025-07-02 15:48:03 -04:00
Peter Geoghegan	4938737d54	Update obsolete row compare preprocessing comments. Restore nbtree preprocessing comments describing how we mark nbtree row compare members required to how they were prior to 2016 bugfix commit a298a1e0. Oversight in commit bd3f59fd, which made nbtree preprocessing revert to the original 2006 rules, but neglected to revert these comments. Backpatch-through: 18	2025-07-02 12:36:34 -04:00
Álvaro Herrera	e16c9cd331	Fix error message for ALTER CONSTRAINT ... NOT VALID Trying to alter a constraint so that it becomes NOT VALID results in an error that assumes the constraint is a foreign key. This is potentially wrong, so give a more generic error message. While at it, give CREATE CONSTRAINT TRIGGER a better error message as well. Co-authored-by: jian he <jian.universality@gmail.com> Co-authored-by: Fujii Masao <masao.fujii@oss.nttdata.com> Co-authored-by: Álvaro Herrera <alvherre@kurilemu.de> Co-authored-by: Amul Sul <sulamul@gmail.com> Discussion: https://postgr.es/m/CACJufxHSp2puxP=q8ZtUGL1F+heapnzqFBZy5ZNGUjUgwjBqTQ@mail.gmail.com	2025-07-02 17:02:27 +02:00
Peter Geoghegan	4cb889d21f	Make row compares robust during nbtree array scans. Recent nbtree bugfix commit 5f4d98d4 added a special case to the code that sets up a page-level prefix of keys that are definitely satisfied by every tuple on the page: whenever _bt_set_startikey reached a row compare key, we'd refuse to apply the pstate.forcenonrequired behavior in scans where that usually happens (scans with a higher-order array key). That hack made the scan avoid essentially the same infinite cycling behavior that also affected nbtree scans with redundant keys (keys that preprocessing could not eliminate) prior to commit f09816a0. There are now serious doubts about this row compare workaround. Testing has shown that a scan with a row compare key and an array key could still read the same leaf page twice (without the scan's direction changing), which isn't supposed to be possible following the SAOP enhancements added by Postgres 17 commit 5bf748b8. Also, we still allowed a required row compare key to be used with forcenonrequired mode when its header key happened to be beyond the pstate.ikey set by _bt_set_startikey, which was complicated and brittle. The underlying problem was that row compares had inconsistent rules around how scans start (which keys can be used for initial positioning purposes) and how scans end (which keys can set continuescan=false). Quals with redundant keys that could not be eliminated by preprocessing also had that same quality to them prior to today's bugfix f09816a0. It now seems prudent to bring row compare keys in line with the new charter for required keys, by making the start and end rules symmetric. This commit fixes two points of disagreement between _bt_first and _bt_check_rowcompare. Firstly, _bt_check_rowcompare was capable of ending the scan at the point where it needed to compare an ISNULL-marked row compare member that came immediately after a required row compare member. _bt_first now has symmetric handling for NULL row compares. Secondly, _bt_first had its own ideas about which keys were safe to use for initial positioning purposes. It could use fewer or more keys than _bt_check_rowcompare. _bt_first now uses the same requiredness markings as _bt_check_rowcompare for this. Now that _bt_first and _bt_check_rowcompare agree on how to start and end scans, we can get rid of the forcenonrequired special case, without any risk of infinite cycling. This approach also makes row compare keys behave more like regular scalar keys, particularly within _bt_first. Fixing these inconsistencies necessitates dealing with a related issue with the way that row compares were marked required by preprocessing: we didn't mark any lower-order row members required following 2016 bugfix commit a298a1e0. That approach was over broad. The bug in question was actually an oversight in how _bt_check_rowcompare dealt with tuple NULL values that failed to satisfy a scan key marked required in the opposite scan direction (it was a bug in 2011 commits 6980f817 and 882368e8, not a bug in 2006 commit 3a0a16cb). Go back to marking row compare members as required using the original 2006 rules, and fix the 2016 bug in a more principled way: by limiting use of the "set continuescan=false with a key required in the opposite scan direction upon encountering a NULL tuple value" optimization to the first/most significant row member key. While it isn't safe to use an implied IS NOT NULL qualifier to end the scan when it comes from a required lower-order row compare member key, it _is_ generally safe for such a required member key to end the scan -- provided the key is marked required in the _current_ scan direction. This fixes what was arguably an oversight in either commit 5f4d98d4 or commit 8a510275. It is a direct follow-up to today's commit f09816a0. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Heikki Linnakangas <heikki.linnakangas@iki.fi> Discussion: https://postgr.es/m/CAH2-Wz=pcijHL_mA0_TJ5LiTB28QpQ0cGtT-ccFV=KzuunNDDQ@mail.gmail.com Backpatch-through: 18	2025-07-02 09:48:14 -04:00
Peter Geoghegan	7c365eb504	Make handling of redundant nbtree keys more robust. nbtree preprocessing's handling of redundant (and contradictory) keys created problems for scans with = arrays. It was just about possible for a scan with an = array key and one or more redundant keys (keys that preprocessing could not eliminate due an incomplete opfamily and a cross-type key) to get stuck. Testing has shown that infinite cycling where the scan never manages to make forward progress was possible. This could happen when the scan's arrays were reset in _bt_readpage's forcenonrequired=true path (added by bugfix commit 5f4d98d4) when the arrays weren't at least advanced up to the same point that they were in at the start of the _bt_readpage call. Earlier redundant keys prevented the finaltup call to _bt_advance_array_keys from reaching lower-order keys that needed to be used to sufficiently advance the scan's arrays. To fix, make preprocessing leave the scan's keys in a state that is as close as possible to how it'll usually leave them (in the common case where there's no redundant keys that preprocessing failed to eliminate). Now nbtree preprocessing _reliably_ leaves behind at most one required >/>= key per index column, and at most one required </<= key per index column. Columns that have one or more = keys that are eligible to be marked required (based on the traditional rules) prioritize the = keys over redundant inequality keys; they'll _reliably_ be left with only one of the = keys as the index column's only required key. Keys that are not marked required (whether due to the new preprocessing step running or for some other reason) are relocated to the end of the so->keyData[] array as needed. That way they'll always be evaluated after the scan's required keys, and so cannot prevent code in places like _bt_advance_array_keys and _bt_first from reaching a required key. Also teach _bt_first to decide which initial positioning keys to use based on the same requiredness markings that have long been used by _bt_checkkeys/_bt_advance_array_keys. This is a necessary condition for reliably avoiding infinite cycling. _bt_advance_array_keys expects to be able to reason about what'll happen in the next _bt_first call should it start another primitive index scan, by evaluating inequality keys that were marked required in the opposite-to-scan scan direction only. Now everybody (_bt_first, _bt_checkkeys, and _bt_advance_array_keys) will always agree on which exact key will be used on each index column to start and/or end the scan (except when row compare keys are involved, which have similar problems not addressed by this commit). An upcoming commit will finish off the work started by this commit by harmonizing how _bt_first, _bt_checkkeys, and _bt_advance_array_keys apply row compare keys to start and end scans. This fixes what was arguably an oversight in either commit 5f4d98d4 or commit 8a510275. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Heikki Linnakangas <heikki.linnakangas@iki.fi> Discussion: https://postgr.es/m/CAH2-Wz=ds4M+3NXMgwxYxqU8MULaLf696_v5g=9WNmWL2=Uo2A@mail.gmail.com Backpatch-through: 18	2025-07-02 09:40:48 -04:00
Daniel Gustafsson	87f0d3cd8d	doc: pg_buffercache documentation wordsmithing A words seemed to have gone missing in the leading paragraphs. Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Co-authored-by: Daniel Gustafsson <daniel@yesql.se> Discussion: https://postgr.es/m/aGTQYZz9L0bjlzVL@ip-10-97-1-34.eu-west-3.compute.internal Backpatch-through: 18	2025-07-02 11:42:36 +02:00
Masahiko Sawada	7c6ededac8	Fix missing FSM vacuum opportunities on tables without indexes. Commit c120550edb86 optimized the vacuuming of relations without indexes (a.k.a. one-pass strategy) by directly marking dead item IDs as LP_UNUSED. However, the periodic FSM vacuum was still checking if dead item IDs had been marked as LP_DEAD when attempting to vacuum the FSM every VACUUM_FSM_EVERY_PAGES blocks. This condition was never met due to the optimization, resulting in missed FSM vacuum opportunities. This commit modifies the periodic FSM vacuum condition to use the number of tuples deleted during HOT pruning. This count includes items marked as either LP_UNUSED or LP_REDIRECT, both of which are expected to result in new free space to report. Back-patch to v17 where the vacuum optimization for tables with no indexes was introduced. Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Discussion: https://postgr.es/m/CAD21AoBL8m6B9GSzQfYxVaEgvD7-Kr3AJaS-hJPHC+avm-29zw@mail.gmail.com Backpatch-through: 17	2025-07-01 23:25:17 -07:00
John Naylor	3e73d87353	Remove implicit cast from 'void *' Commit e2809e3a101 added code to a header which assigns a pointer to void to a pointer to unsigned char. This causes build errors for extensions written in C++. Fix by adding an explicit cast. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CANWCAZaCq9AHBuhs%3DMx7Gg_0Af9oRU7iAqr0itJCtfmsWwVmnQ%40mail.gmail.com Backpatch-through: 18	2025-07-02 11:51:53 +07:00
Michael Paquier	d09d137934	Fix bug in archive streamer with LZ4 decompression When decompressing some input data, the calculation for the initial starting point and the initial size were incorrect, potentially leading to failures when decompressing contents with LZ4. These initialization points are fixed in this commit, bringing the logic closer to what exists for gzip and zstd. The contents of the compressed data is clear (for example backups taken with LZ4 can still be decompressed with a "lz4" command), only the decompression part reading the input data was impacted by this issue. This code path impacts pg_basebackup and pg_verifybackup, which can use the LZ4 decompression routines with an archive streamer, or any tools that try to use the archive streamers in src/fe_utils/. The issue is easier to reproduce with files that have a low-compression rate, like ones filled with random data, for a size of at least 512kB, but this could happen with anything as long as it is stored in a data folder. Some tests are added based on this idea, with a file filled with random bytes grabbed from the backend, written at the root of the data folder. This is proving good enough to reproduce the original problem. Author: Mikhail Gribkov <youzhick@gmail.com> Discussion: https://postgr.es/m/CAMEv5_uQS1Hg6KCaEP2JkrTBbZ-nXQhxomWrhYQvbdzR-zy-wA@mail.gmail.com Backpatch-through: 15	2025-07-02 13:48:41 +09:00