Separate TBM[Shared|Private]Iterator and TBMIterateResult

Remove the TBMIterateResult member from the TBMPrivateIterator and
TBMSharedIterator and make tbm_[shared|private_]iterate() take a
TBMIterateResult as a parameter.

This allows tidbitmap API users to manage multiple TBMIterateResults per
scan. This is required for bitmap heap scan to use the read stream API,
with which there may be multiple I/Os in flight at once, each one with a
TBMIterateResult.

Reviewed-by: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/d4bb26c9-fe07-439e-ac53-c0e244387e01%40vondra.me
This commit is contained in:
Melanie Plageman 2025-03-15 10:10:51 -04:00
parent 799959dc7c
commit 944e81bf99
7 changed files with 110 additions and 101 deletions

View File

@ -332,8 +332,8 @@ restartScanEntry:
entry->list = NULL;
entry->nlist = 0;
entry->matchBitmap = NULL;
entry->matchResult = NULL;
entry->matchNtuples = -1;
entry->matchResult.blockno = InvalidBlockNumber;
entry->reduceResult = false;
entry->predictNumberResult = 0;
@ -827,20 +827,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
{
/*
* If we've exhausted all items on this block, move to next block
* in the bitmap.
* in the bitmap. tbm_private_iterate() sets matchResult.blockno
* to InvalidBlockNumber when the bitmap is exhausted.
*/
while (entry->matchResult == NULL ||
(!entry->matchResult->lossy &&
while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
(!entry->matchResult.lossy &&
entry->offset >= entry->matchNtuples) ||
entry->matchResult->blockno < advancePastBlk ||
entry->matchResult.blockno < advancePastBlk ||
(ItemPointerIsLossyPage(&advancePast) &&
entry->matchResult->blockno == advancePastBlk))
entry->matchResult.blockno == advancePastBlk))
{
entry->matchResult =
tbm_private_iterate(entry->matchIterator);
if (entry->matchResult == NULL)
if (!tbm_private_iterate(entry->matchIterator, &entry->matchResult))
{
Assert(!BlockNumberIsValid(entry->matchResult.blockno));
ItemPointerSetInvalid(&entry->curItem);
tbm_end_private_iterate(entry->matchIterator);
entry->matchIterator = NULL;
@ -849,14 +848,14 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
}
/* Exact pages need their tuple offsets extracted. */
if (!entry->matchResult->lossy)
entry->matchNtuples = tbm_extract_page_tuple(entry->matchResult,
if (!entry->matchResult.lossy)
entry->matchNtuples = tbm_extract_page_tuple(&entry->matchResult,
entry->matchOffsets,
TBM_MAX_TUPLES_PER_PAGE);
/*
* Reset counter to the beginning of entry->matchResult. Note:
* entry->offset is still greater than entry->matchNtuples if
* entry->offset is still greater than matchResult.ntuples if
* matchResult is lossy. So, on next call we will get next
* result from TIDBitmap.
*/
@ -869,10 +868,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
* We're now on the first page after advancePast which has any
* items on it. If it's a lossy result, return that.
*/
if (entry->matchResult->lossy)
if (entry->matchResult.lossy)
{
ItemPointerSetLossyPage(&entry->curItem,
entry->matchResult->blockno);
entry->matchResult.blockno);
/*
* We might as well fall out of the loop; we could not
@ -889,7 +888,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
Assert(entry->matchNtuples > -1);
/* Skip over any offsets <= advancePast, and return that. */
if (entry->matchResult->blockno == advancePastBlk)
if (entry->matchResult.blockno == advancePastBlk)
{
Assert(entry->matchNtuples > 0);
@ -910,7 +909,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
}
ItemPointerSet(&entry->curItem,
entry->matchResult->blockno,
entry->matchResult.blockno,
entry->matchOffsets[entry->offset]);
entry->offset++;

View File

@ -111,7 +111,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
ItemPointerSetMin(&scanEntry->curItem);
scanEntry->matchBitmap = NULL;
scanEntry->matchIterator = NULL;
scanEntry->matchResult = NULL;
scanEntry->matchResult.blockno = InvalidBlockNumber;
scanEntry->matchNtuples = -1;
scanEntry->list = NULL;
scanEntry->nlist = 0;

View File

@ -2126,7 +2126,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
Buffer buffer;
Snapshot snapshot;
int ntup;
TBMIterateResult *tbmres;
TBMIterateResult tbmres;
OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
int noffsets = -1;
@ -2142,14 +2142,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
{
CHECK_FOR_INTERRUPTS();
tbmres = tbm_iterate(&scan->st.rs_tbmiterator);
if (tbmres == NULL)
if (!tbm_iterate(&scan->st.rs_tbmiterator, &tbmres))
return false;
/* Exact pages need their tuple offsets extracted. */
if (!tbmres->lossy)
noffsets = tbm_extract_page_tuple(tbmres, offsets,
if (!tbmres.lossy)
noffsets = tbm_extract_page_tuple(&tbmres, offsets,
TBM_MAX_TUPLES_PER_PAGE);
/*
@ -2161,11 +2159,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
* reachable by the index.
*/
} while (!IsolationIsSerializable() &&
tbmres->blockno >= hscan->rs_nblocks);
tbmres.blockno >= hscan->rs_nblocks);
/* Got a valid block */
*blockno = tbmres->blockno;
*recheck = tbmres->recheck;
*blockno = tbmres.blockno;
*recheck = tbmres.recheck;
/*
* We can skip fetching the heap page if we don't need any fields from the
@ -2173,11 +2171,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
* page are visible to our transaction.
*/
if (!(scan->rs_flags & SO_NEED_TUPLES) &&
!tbmres->recheck &&
VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer))
!tbmres.recheck &&
VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &bscan->rs_vmbuffer))
{
/* can't be lossy in the skip_fetch case */
Assert(!tbmres->lossy);
Assert(!tbmres.lossy);
Assert(bscan->rs_empty_tuples_pending >= 0);
Assert(noffsets > -1);
@ -2186,7 +2184,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
return true;
}
block = tbmres->blockno;
block = tbmres.blockno;
/*
* Acquire pin on the target heap page, trading in any pin we held before.
@ -2215,7 +2213,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
/*
* We need two separate strategies for lossy and non-lossy cases.
*/
if (!tbmres->lossy)
if (!tbmres.lossy)
{
/*
* Bitmap is non-lossy, so we just look through the offsets listed in
@ -2279,7 +2277,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
Assert(ntup <= MaxHeapTuplesPerPage);
hscan->rs_ntuples = ntup;
if (tbmres->lossy)
if (tbmres.lossy)
(*lossy_pages)++;
else
(*exact_pages)++;

View File

@ -317,7 +317,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
{
#ifdef USE_PREFETCH
ParallelBitmapHeapState *pstate = node->pstate;
TBMIterateResult *tbmpre;
TBMIterateResult tbmpre;
if (pstate == NULL)
{
@ -330,9 +330,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
}
else if (!tbm_exhausted(prefetch_iterator))
{
tbmpre = tbm_iterate(prefetch_iterator);
node->prefetch_blockno = tbmpre ? tbmpre->blockno :
InvalidBlockNumber;
tbm_iterate(prefetch_iterator, &tbmpre);
node->prefetch_blockno = tbmpre.blockno;
}
return;
}
@ -371,9 +370,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
*/
if (!tbm_exhausted(prefetch_iterator))
{
tbmpre = tbm_iterate(prefetch_iterator);
node->prefetch_blockno = tbmpre ? tbmpre->blockno :
InvalidBlockNumber;
tbm_iterate(prefetch_iterator, &tbmpre);
node->prefetch_blockno = tbmpre.blockno;
}
}
}
@ -441,17 +439,18 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
{
while (node->prefetch_pages < node->prefetch_target)
{
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
TBMIterateResult tbmpre;
bool skip_fetch;
if (tbmpre == NULL)
if (!tbm_iterate(prefetch_iterator, &tbmpre))
{
/* No more pages to prefetch */
Assert(!BlockNumberIsValid(tbmpre.blockno));
tbm_end_iterate(prefetch_iterator);
break;
}
node->prefetch_pages++;
node->prefetch_blockno = tbmpre->blockno;
node->prefetch_blockno = tbmpre.blockno;
/*
* If we expect not to have to actually read this heap page,
@ -460,13 +459,13 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
* prefetch_pages?)
*/
skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
!tbmpre->recheck &&
!tbmpre.recheck &&
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
tbmpre->blockno,
tbmpre.blockno,
&node->pvmbuffer));
if (!skip_fetch)
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno);
}
}
@ -481,7 +480,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
{
while (1)
{
TBMIterateResult *tbmpre;
TBMIterateResult tbmpre;
bool do_prefetch = false;
bool skip_fetch;
@ -500,25 +499,25 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
if (!do_prefetch)
return;
tbmpre = tbm_iterate(prefetch_iterator);
if (tbmpre == NULL)
if (!tbm_iterate(prefetch_iterator, &tbmpre))
{
Assert(!BlockNumberIsValid(tbmpre.blockno));
/* No more pages to prefetch */
tbm_end_iterate(prefetch_iterator);
break;
}
node->prefetch_blockno = tbmpre->blockno;
node->prefetch_blockno = tbmpre.blockno;
/* As above, skip prefetch if we expect not to need page */
skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
!tbmpre->recheck &&
!tbmpre.recheck &&
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
tbmpre->blockno,
tbmpre.blockno,
&node->pvmbuffer));
if (!skip_fetch)
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno);
}
}
}

View File

@ -172,7 +172,6 @@ struct TBMPrivateIterator
int spageptr; /* next spages index */
int schunkptr; /* next schunks index */
int schunkbit; /* next bit to check in current schunk */
TBMIterateResult output;
};
/*
@ -213,7 +212,6 @@ struct TBMSharedIterator
PTEntryArray *ptbase; /* pagetable element array */
PTIterationArray *ptpages; /* sorted exact page index list */
PTIterationArray *ptchunks; /* sorted lossy page index list */
TBMIterateResult output;
};
/* Local function prototypes */
@ -957,21 +955,28 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp)
/*
* tbm_private_iterate - scan through next page of a TIDBitmap
*
* Returns a TBMIterateResult representing one page, or NULL if there are
* no more pages to scan. Pages are guaranteed to be delivered in numerical
* order. If lossy is true, then the bitmap is "lossy" and failed to
* remember the exact tuples to look at on this page --- the caller must
* examine all tuples on the page and check if they meet the intended
* condition. result->ntuples is set to -1 when the bitmap is lossy.
* If result->recheck is true, only the indicated tuples need
* be examined, but the condition must be rechecked anyway. (For ease of
* testing, recheck is always set true when lossy is true.)
* Caller must pass in a TBMIterateResult to be filled.
*
* Pages are guaranteed to be delivered in numerical order.
*
* Returns false when there are no more pages to scan and true otherwise. When
* there are no more pages to scan, tbmres->blockno is set to
* InvalidBlockNumber.
*
* If lossy is true, then the bitmap is "lossy" and failed to remember
* the exact tuples to look at on this page --- the caller must examine all
* tuples on the page and check if they meet the intended condition. If lossy
* is false, the caller must later extract the tuple offsets from the page
* pointed to by internal_page with tbm_extract_page_tuple.
*
* If tbmres->recheck is true, only the indicated tuples need be examined, but
* the condition must be rechecked anyway. (For ease of testing, recheck is
* always set true when lossy is true.)
*/
TBMIterateResult *
tbm_private_iterate(TBMPrivateIterator *iterator)
bool
tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres)
{
TIDBitmap *tbm = iterator->tbm;
TBMIterateResult *output = &(iterator->output);
Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
@ -1009,12 +1014,12 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
chunk_blockno < tbm->spages[iterator->spageptr]->blockno)
{
/* Return a lossy page indicator from the chunk */
output->blockno = chunk_blockno;
output->lossy = true;
output->recheck = true;
output->internal_page = NULL;
tbmres->blockno = chunk_blockno;
tbmres->lossy = true;
tbmres->recheck = true;
tbmres->internal_page = NULL;
iterator->schunkbit++;
return output;
return true;
}
}
@ -1028,16 +1033,17 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
else
page = tbm->spages[iterator->spageptr];
output->internal_page = page;
output->blockno = page->blockno;
output->lossy = false;
output->recheck = page->recheck;
tbmres->internal_page = page;
tbmres->blockno = page->blockno;
tbmres->lossy = false;
tbmres->recheck = page->recheck;
iterator->spageptr++;
return output;
return true;
}
/* Nothing more in the bitmap */
return NULL;
tbmres->blockno = InvalidBlockNumber;
return false;
}
/*
@ -1047,10 +1053,9 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
* across multiple processes. We need to acquire the iterator LWLock,
* before accessing the shared members.
*/
TBMIterateResult *
tbm_shared_iterate(TBMSharedIterator *iterator)
bool
tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres)
{
TBMIterateResult *output = &iterator->output;
TBMSharedIteratorState *istate = iterator->state;
PagetableEntry *ptbase = NULL;
int *idxpages = NULL;
@ -1101,14 +1106,14 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno)
{
/* Return a lossy page indicator from the chunk */
output->blockno = chunk_blockno;
output->lossy = true;
output->recheck = true;
output->internal_page = NULL;
tbmres->blockno = chunk_blockno;
tbmres->lossy = true;
tbmres->recheck = true;
tbmres->internal_page = NULL;
istate->schunkbit++;
LWLockRelease(&istate->lock);
return output;
return true;
}
}
@ -1116,21 +1121,22 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
{
PagetableEntry *page = &ptbase[idxpages[istate->spageptr]];
output->internal_page = page;
output->blockno = page->blockno;
output->lossy = false;
output->recheck = page->recheck;
tbmres->internal_page = page;
tbmres->blockno = page->blockno;
tbmres->lossy = false;
tbmres->recheck = page->recheck;
istate->spageptr++;
LWLockRelease(&istate->lock);
return output;
return true;
}
LWLockRelease(&istate->lock);
/* Nothing more in the bitmap */
return NULL;
tbmres->blockno = InvalidBlockNumber;
return false;
}
/*
@ -1604,15 +1610,17 @@ tbm_end_iterate(TBMIterator *iterator)
}
/*
* Get the next TBMIterateResult from the shared or private bitmap iterator.
* Populate the next TBMIterateResult using the shared or private bitmap
* iterator. Returns false when there is nothing more to scan.
*/
TBMIterateResult *
tbm_iterate(TBMIterator *iterator)
bool
tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres)
{
Assert(iterator);
Assert(tbmres);
if (iterator->shared)
return tbm_shared_iterate(iterator->i.shared_iterator);
return tbm_shared_iterate(iterator->i.shared_iterator, tbmres);
else
return tbm_private_iterate(iterator->i.private_iterator);
return tbm_private_iterate(iterator->i.private_iterator, tbmres);
}

View File

@ -354,7 +354,12 @@ typedef struct GinScanEntryData
/* for a partial-match or full-scan query, we accumulate all TIDs here */
TIDBitmap *matchBitmap;
TBMPrivateIterator *matchIterator;
TBMIterateResult *matchResult;
/*
* If blockno is InvalidBlockNumber, all of the other fields in the
* matchResult are meaningless.
*/
TBMIterateResult matchResult;
OffsetNumber matchOffsets[TBM_MAX_TUPLES_PER_PAGE];
int matchNtuples;

View File

@ -101,8 +101,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm);
extern TBMPrivateIterator *tbm_begin_private_iterate(TIDBitmap *tbm);
extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm);
extern TBMIterateResult *tbm_private_iterate(TBMPrivateIterator *iterator);
extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator);
extern bool tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres);
extern bool tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres);
extern void tbm_end_private_iterate(TBMPrivateIterator *iterator);
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
@ -113,7 +113,7 @@ extern TBMIterator tbm_begin_iterate(TIDBitmap *tbm,
dsa_area *dsa, dsa_pointer dsp);
extern void tbm_end_iterate(TBMIterator *iterator);
extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
extern bool tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres);
static inline bool
tbm_exhausted(TBMIterator *iterator)