mirror of
https://github.com/postgres/postgres.git
synced 2025-05-18 00:02:16 -04:00
BRIN de-summarization
When the BRIN summary tuple for a page range becomes too "wide" for the values actually stored in the table (because the tuples that were present originally are no longer present due to updates or deletes), it can be useful to remove the outdated summary tuple, so that a future summarization can install a tighter summary. This commit introduces a SQL-callable interface to do so. Author: Álvaro Herrera Reviewed-by: Eiji Seki Discussion: https://postgr.es/m/20170228045643.n2ri74ara4fhhfxf@alvherre.pgsql
This commit is contained in:
parent
3a82129a40
commit
c655899ba9
@ -80,6 +80,10 @@
|
||||
or by automatic summarization executed by autovacuum, as insertions
|
||||
occur. (This last trigger is disabled by default and can be enabled
|
||||
with the <literal>autosummarize</literal> parameter.)
|
||||
Conversely, a range can be de-summarized using the
|
||||
<function>brin_desummarize_range(regclass, bigint)</function> range,
|
||||
which is useful when the index tuple is no longer a very good
|
||||
representation because the existing values have changed.
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
@ -19660,6 +19660,14 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
|
||||
<primary>gin_clean_pending_list</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>brin_summarize_range</primary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>brin_desummarize_range</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
<xref linkend="functions-admin-index-table"> shows the functions
|
||||
available for index maintenance tasks.
|
||||
@ -19690,6 +19698,13 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
|
||||
<entry><type>integer</type></entry>
|
||||
<entry>summarize the page range covering the given block, if not already summarized</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>
|
||||
<literal><function>brin_desummarize_range(<parameter>index</> <type>regclass</>, <parameter>blockNumber</> <type>bigint</type>)</function></literal>
|
||||
</entry>
|
||||
<entry><type>integer</type></entry>
|
||||
<entry>de-summarize the page range covering the given block, if summarized</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>
|
||||
<literal><function>gin_clean_pending_list(<parameter>index</> <type>regclass</>)</function></literal>
|
||||
|
@ -908,6 +908,80 @@ brin_summarize_range(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_INT32((int32) numSummarized);
|
||||
}
|
||||
|
||||
/*
|
||||
* SQL-callable interface to mark a range as no longer summarized
|
||||
*/
|
||||
Datum
|
||||
brin_desummarize_range(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid indexoid = PG_GETARG_OID(0);
|
||||
int64 heapBlk64 = PG_GETARG_INT64(1);
|
||||
BlockNumber heapBlk;
|
||||
Oid heapoid;
|
||||
Relation heapRel;
|
||||
Relation indexRel;
|
||||
bool done;
|
||||
|
||||
if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
|
||||
{
|
||||
char *blk = psprintf(INT64_FORMAT, heapBlk64);
|
||||
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||
errmsg("block number out of range: %s", blk)));
|
||||
}
|
||||
heapBlk = (BlockNumber) heapBlk64;
|
||||
|
||||
/*
|
||||
* We must lock table before index to avoid deadlocks. However, if the
|
||||
* passed indexoid isn't an index then IndexGetRelation() will fail.
|
||||
* Rather than emitting a not-very-helpful error message, postpone
|
||||
* complaining, expecting that the is-it-an-index test below will fail.
|
||||
*/
|
||||
heapoid = IndexGetRelation(indexoid, true);
|
||||
if (OidIsValid(heapoid))
|
||||
heapRel = heap_open(heapoid, ShareUpdateExclusiveLock);
|
||||
else
|
||||
heapRel = NULL;
|
||||
|
||||
indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
|
||||
|
||||
/* Must be a BRIN index */
|
||||
if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
|
||||
indexRel->rd_rel->relam != BRIN_AM_OID)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
||||
errmsg("\"%s\" is not a BRIN index",
|
||||
RelationGetRelationName(indexRel))));
|
||||
|
||||
/* User must own the index (comparable to privileges needed for VACUUM) */
|
||||
if (!pg_class_ownercheck(indexoid, GetUserId()))
|
||||
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
|
||||
RelationGetRelationName(indexRel));
|
||||
|
||||
/*
|
||||
* Since we did the IndexGetRelation call above without any lock, it's
|
||||
* barely possible that a race against an index drop/recreation could have
|
||||
* netted us the wrong table. Recheck.
|
||||
*/
|
||||
if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_UNDEFINED_TABLE),
|
||||
errmsg("could not open parent table of index %s",
|
||||
RelationGetRelationName(indexRel))));
|
||||
|
||||
/* the revmap does the hard work */
|
||||
do {
|
||||
done = brinRevmapDesummarizeRange(indexRel, heapBlk);
|
||||
}
|
||||
while (!done);
|
||||
|
||||
relation_close(indexRel, ShareUpdateExclusiveLock);
|
||||
relation_close(heapRel, ShareUpdateExclusiveLock);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/*
|
||||
* Build a BrinDesc used to create or scan a BRIN index
|
||||
*/
|
||||
|
@ -168,9 +168,12 @@ brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
|
||||
iptr = (ItemPointerData *) contents->rm_tids;
|
||||
iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
|
||||
|
||||
ItemPointerSet(iptr,
|
||||
ItemPointerGetBlockNumber(&tid),
|
||||
ItemPointerGetOffsetNumber(&tid));
|
||||
if (ItemPointerIsValid(&tid))
|
||||
ItemPointerSet(iptr,
|
||||
ItemPointerGetBlockNumber(&tid),
|
||||
ItemPointerGetOffsetNumber(&tid));
|
||||
else
|
||||
ItemPointerSetInvalid(iptr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -304,6 +307,137 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete an index tuple, marking a page range as unsummarized.
|
||||
*
|
||||
* Index must be locked in ShareUpdateExclusiveLock mode.
|
||||
*
|
||||
* Return FALSE if caller should retry.
|
||||
*/
|
||||
bool
|
||||
brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
|
||||
{
|
||||
BrinRevmap *revmap;
|
||||
BlockNumber pagesPerRange;
|
||||
RevmapContents *contents;
|
||||
ItemPointerData *iptr;
|
||||
ItemPointerData invalidIptr;
|
||||
BlockNumber revmapBlk;
|
||||
Buffer revmapBuf;
|
||||
Buffer regBuf;
|
||||
Page revmapPg;
|
||||
Page regPg;
|
||||
OffsetNumber revmapOffset;
|
||||
OffsetNumber regOffset;
|
||||
ItemId lp;
|
||||
BrinTuple *tup;
|
||||
|
||||
revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL);
|
||||
|
||||
revmapBlk = revmap_get_blkno(revmap, heapBlk);
|
||||
if (!BlockNumberIsValid(revmapBlk))
|
||||
{
|
||||
/* revmap page doesn't exist: range not summarized, we're done */
|
||||
brinRevmapTerminate(revmap);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Lock the revmap page, obtain the index tuple pointer from it */
|
||||
revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
|
||||
revmapPg = BufferGetPage(revmapBuf);
|
||||
revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
|
||||
|
||||
contents = (RevmapContents *) PageGetContents(revmapPg);
|
||||
iptr = contents->rm_tids;
|
||||
iptr += revmapOffset;
|
||||
|
||||
if (!ItemPointerIsValid(iptr))
|
||||
{
|
||||
/* no index tuple: range not summarized, we're done */
|
||||
LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
|
||||
brinRevmapTerminate(revmap);
|
||||
return true;
|
||||
}
|
||||
|
||||
regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr));
|
||||
LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
|
||||
regPg = BufferGetPage(regBuf);
|
||||
|
||||
/* if this is no longer a regular page, tell caller to start over */
|
||||
if (!BRIN_IS_REGULAR_PAGE(regPg))
|
||||
{
|
||||
LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
|
||||
LockBuffer(regBuf, BUFFER_LOCK_UNLOCK);
|
||||
brinRevmapTerminate(revmap);
|
||||
return false;
|
||||
}
|
||||
|
||||
regOffset = ItemPointerGetOffsetNumber(iptr);
|
||||
if (regOffset > PageGetMaxOffsetNumber(regPg))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("corrupted BRIN index: inconsistent range map")));
|
||||
|
||||
lp = PageGetItemId(regPg, regOffset);
|
||||
if (!ItemIdIsUsed(lp))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
||||
errmsg("corrupted BRIN index: inconsistent range map")));
|
||||
tup = (BrinTuple *) PageGetItem(regPg, lp);
|
||||
/* XXX apply sanity checks? Might as well delete a bogus tuple ... */
|
||||
|
||||
/*
|
||||
* We're only removing data, not reading it, so there's no need to
|
||||
* TestForOldSnapshot here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Because of SUE lock, this function shouldn't run concurrently with
|
||||
* summarization. Placeholder tuples can only exist as leftovers from
|
||||
* crashed summarization, so if we detect any, we complain but proceed.
|
||||
*/
|
||||
if (BrinTupleIsPlaceholder(tup))
|
||||
ereport(WARNING,
|
||||
(errmsg("leftover placeholder tuple detected in BRIN index \"%s\", deleting",
|
||||
RelationGetRelationName(idxrel))));
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
ItemPointerSetInvalid(&invalidIptr);
|
||||
brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk,
|
||||
invalidIptr);
|
||||
PageIndexTupleDeleteNoCompact(regPg, regOffset);
|
||||
/* XXX record free space in FSM? */
|
||||
|
||||
MarkBufferDirty(regBuf);
|
||||
MarkBufferDirty(revmapBuf);
|
||||
|
||||
if (RelationNeedsWAL(idxrel))
|
||||
{
|
||||
xl_brin_desummarize xlrec;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
xlrec.heapBlk = heapBlk;
|
||||
xlrec.regOffset = regOffset;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize);
|
||||
XLogRegisterBuffer(0, revmapBuf, 0);
|
||||
XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD);
|
||||
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE);
|
||||
PageSetLSN(revmapPg, recptr);
|
||||
PageSetLSN(regPg, recptr);
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
UnlockReleaseBuffer(regBuf);
|
||||
LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
|
||||
brinRevmapTerminate(revmap);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a heap block number, find the corresponding physical revmap block
|
||||
* number and return it. If the revmap page hasn't been allocated yet, return
|
||||
|
@ -254,6 +254,46 @@ brin_xlog_revmap_extend(XLogReaderState *record)
|
||||
UnlockReleaseBuffer(metabuf);
|
||||
}
|
||||
|
||||
static void
|
||||
brin_xlog_desummarize_page(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_brin_desummarize *xlrec;
|
||||
Buffer buffer;
|
||||
XLogRedoAction action;
|
||||
|
||||
xlrec = (xl_brin_desummarize *) XLogRecGetData(record);
|
||||
|
||||
/* Update the revmap */
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
ItemPointerData iptr;
|
||||
|
||||
ItemPointerSetInvalid(&iptr);
|
||||
brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, iptr);
|
||||
|
||||
PageSetLSN(BufferGetPage(buffer), lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* remove the leftover entry from the regular page */
|
||||
action = XLogReadBufferForRedo(record, 1, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page regPg = BufferGetPage(buffer);
|
||||
|
||||
PageIndexTupleDeleteNoCompact(regPg, xlrec->regOffset);
|
||||
|
||||
PageSetLSN(regPg, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
void
|
||||
brin_redo(XLogReaderState *record)
|
||||
{
|
||||
@ -276,6 +316,9 @@ brin_redo(XLogReaderState *record)
|
||||
case XLOG_BRIN_REVMAP_EXTEND:
|
||||
brin_xlog_revmap_extend(record);
|
||||
break;
|
||||
case XLOG_BRIN_DESUMMARIZE:
|
||||
brin_xlog_desummarize_page(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "brin_redo: unknown op code %u", info);
|
||||
}
|
||||
|
@ -61,6 +61,13 @@ brin_desc(StringInfo buf, XLogReaderState *record)
|
||||
|
||||
appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
|
||||
}
|
||||
else if (info == XLOG_BRIN_DESUMMARIZE)
|
||||
{
|
||||
xl_brin_desummarize *xlrec = (xl_brin_desummarize *) rec;
|
||||
|
||||
appendStringInfo(buf, "pagesPerRange %u, heapBlk %u, page offset %u",
|
||||
xlrec->pagesPerRange, xlrec->heapBlk, xlrec->regOffset);
|
||||
}
|
||||
}
|
||||
|
||||
const char *
|
||||
@ -91,6 +98,9 @@ brin_identify(uint8 info)
|
||||
case XLOG_BRIN_REVMAP_EXTEND:
|
||||
id = "REVMAP_EXTEND";
|
||||
break;
|
||||
case XLOG_BRIN_DESUMMARIZE:
|
||||
id = "DESUMMARIZE";
|
||||
break;
|
||||
}
|
||||
|
||||
return id;
|
||||
|
@ -36,5 +36,6 @@ extern void brinSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange,
|
||||
extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap,
|
||||
BlockNumber heapBlk, Buffer *buf, OffsetNumber *off,
|
||||
Size *size, int mode, Snapshot snapshot);
|
||||
extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk);
|
||||
|
||||
#endif /* BRIN_REVMAP_H */
|
||||
|
@ -33,7 +33,7 @@
|
||||
#define XLOG_BRIN_UPDATE 0x20
|
||||
#define XLOG_BRIN_SAMEPAGE_UPDATE 0x30
|
||||
#define XLOG_BRIN_REVMAP_EXTEND 0x40
|
||||
#define XLOG_BRIN_REVMAP_VACUUM 0x50
|
||||
#define XLOG_BRIN_DESUMMARIZE 0x50
|
||||
|
||||
#define XLOG_BRIN_OPMASK 0x70
|
||||
/*
|
||||
@ -124,6 +124,24 @@ typedef struct xl_brin_revmap_extend
|
||||
#define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetBlk) + \
|
||||
sizeof(BlockNumber))
|
||||
|
||||
/*
|
||||
* This is what we need to know about a range de-summarization
|
||||
*
|
||||
* Backup block 0: revmap page
|
||||
* Backup block 1: regular page
|
||||
*/
|
||||
typedef struct xl_brin_desummarize
|
||||
{
|
||||
BlockNumber pagesPerRange;
|
||||
/* page number location to set to invalid */
|
||||
OffsetNumber heapBlk;
|
||||
/* offset of item to delete in regular index page */
|
||||
OffsetNumber regOffset;
|
||||
} xl_brin_desummarize;
|
||||
|
||||
#define SizeOfBrinDesummarize (offsetof(xl_brin_desummarize, regOffset) + \
|
||||
sizeof(OffsetNumber))
|
||||
|
||||
|
||||
extern void brin_redo(XLogReaderState *record);
|
||||
extern void brin_desc(StringInfo buf, XLogReaderState *record);
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201704011
|
||||
#define CATALOG_VERSION_NO 201704012
|
||||
|
||||
#endif
|
||||
|
@ -566,6 +566,8 @@ DATA(insert OID = 3952 ( brin_summarize_new_values PGNSP PGUID 12 1 0 0 0 f f f
|
||||
DESCR("brin: standalone scan new table pages");
|
||||
DATA(insert OID = 3999 ( brin_summarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 23 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_summarize_range _null_ _null_ _null_ ));
|
||||
DESCR("brin: standalone scan new table pages");
|
||||
DATA(insert OID = 4014 ( brin_desummarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 2278 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_desummarize_range _null_ _null_ _null_ ));
|
||||
DESCR("brin: desummarize page range");
|
||||
|
||||
DATA(insert OID = 338 ( amvalidate PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ amvalidate _null_ _null_ _null_ ));
|
||||
DESCR("validate an operator class");
|
||||
|
@ -392,6 +392,12 @@ INSERT INTO brintest SELECT
|
||||
format('%s/%s%s', odd, even, tenthous)::pg_lsn,
|
||||
box(point(odd, even), point(thousand, twothousand))
|
||||
FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;
|
||||
SELECT brin_desummarize_range('brinidx', 0);
|
||||
brin_desummarize_range
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
VACUUM brintest; -- force a summarization cycle in brinidx
|
||||
UPDATE brintest SET int8col = int8col * int4col;
|
||||
UPDATE brintest SET textcol = '' WHERE textcol IS NOT NULL;
|
||||
@ -406,6 +412,27 @@ SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected
|
||||
0
|
||||
(1 row)
|
||||
|
||||
-- Tests for brin_desummarize_range
|
||||
SELECT brin_desummarize_range('brinidx', -1); -- error, invalid range
|
||||
ERROR: block number out of range: -1
|
||||
SELECT brin_desummarize_range('brinidx', 0);
|
||||
brin_desummarize_range
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT brin_desummarize_range('brinidx', 0);
|
||||
brin_desummarize_range
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT brin_desummarize_range('brinidx', 100000000);
|
||||
brin_desummarize_range
|
||||
------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- Test brin_summarize_range
|
||||
CREATE TABLE brin_summarize (
|
||||
value int
|
||||
|
@ -400,6 +400,7 @@ INSERT INTO brintest SELECT
|
||||
box(point(odd, even), point(thousand, twothousand))
|
||||
FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;
|
||||
|
||||
SELECT brin_desummarize_range('brinidx', 0);
|
||||
VACUUM brintest; -- force a summarization cycle in brinidx
|
||||
|
||||
UPDATE brintest SET int8col = int8col * int4col;
|
||||
@ -410,6 +411,12 @@ SELECT brin_summarize_new_values('brintest'); -- error, not an index
|
||||
SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index
|
||||
SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected
|
||||
|
||||
-- Tests for brin_desummarize_range
|
||||
SELECT brin_desummarize_range('brinidx', -1); -- error, invalid range
|
||||
SELECT brin_desummarize_range('brinidx', 0);
|
||||
SELECT brin_desummarize_range('brinidx', 0);
|
||||
SELECT brin_desummarize_range('brinidx', 100000000);
|
||||
|
||||
-- Test brin_summarize_range
|
||||
CREATE TABLE brin_summarize (
|
||||
value int
|
||||
|
Loading…
x
Reference in New Issue
Block a user