mirror of
https://github.com/postgres/postgres.git
synced 2025-05-30 00:02:11 -04:00
The primary bottlenecks for relation extension are: 1) The extension lock is held while acquiring a victim buffer for the new page. Acquiring a victim buffer can require writing out the old page contents including possibly needing to flush WAL. 2) When extending via ReadBuffer() et al, we write a zero page during the extension, and then later write out the actual page contents. This can nearly double the write rate. 3) The existing bulk relation extension infrastructure in hio.c just amortized the cost of acquiring the relation extension lock, but none of the other costs. Unfortunately 1) cannot currently be addressed in a central manner as the callers to ReadBuffer() need to acquire the extension lock. To address that, this this commit moves the responsibility for acquiring the extension lock into bufmgr.c functions. That allows to acquire the relation extension lock for just the required time. This will also allow us to improve relation extension further, without changing callers. The reason we write all-zeroes pages during relation extension is that we hope to get ENOSPC errors earlier that way (largely works, except for CoW filesystems). It is easier to handle out-of-space errors gracefully if the page doesn't yet contain actual tuples. This commit addresses 2), by using the recently introduced smgrzeroextend(), which extends the relation, without dirtying the kernel page cache for all the extended pages. To address 3), this commit introduces a function to extend a relation by multiple blocks at a time. There are three new exposed functions: ExtendBufferedRel() for extending the relation by a single block, ExtendBufferedRelBy() to extend a relation by multiple blocks at once, and ExtendBufferedRelTo() for extending a relation up to a certain size. To avoid duplicating code between ReadBuffer(P_NEW) and the new functions, ReadBuffer(P_NEW) now implements relation extension with ExtendBufferedRel(), using a flag to tell ExtendBufferedRel() that the relation lock is already held. Note that this commit does not yet lead to a meaningful performance or scalability improvement - for that uses of ReadBuffer(P_NEW) will need to be converted to ExtendBuffered*(), which will be done in subsequent commits. Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Discussion: https://postgr.es/m/20221029025420.eplyow6k7tgu6he3@awork3.anarazel.de
95 lines
3.4 KiB
D
95 lines
3.4 KiB
D
/* ----------
|
|
* DTrace probes for PostgreSQL backend
|
|
*
|
|
* Copyright (c) 2006-2023, PostgreSQL Global Development Group
|
|
*
|
|
* src/backend/utils/probes.d
|
|
* ----------
|
|
*/
|
|
|
|
|
|
/*
|
|
* Typedefs used in PostgreSQL probes.
|
|
*
|
|
* NOTE: Do not use system-provided typedefs (e.g. uintptr_t, uint32_t, etc)
|
|
* in probe definitions, as they cause compilation errors on macOS.
|
|
*/
|
|
#define LocalTransactionId unsigned int
|
|
#define LWLockMode int
|
|
#define LOCKMODE int
|
|
#define BlockNumber unsigned int
|
|
#define Oid unsigned int
|
|
#define ForkNumber int
|
|
#define bool unsigned char
|
|
|
|
provider postgresql {
|
|
|
|
probe transaction__start(LocalTransactionId);
|
|
probe transaction__commit(LocalTransactionId);
|
|
probe transaction__abort(LocalTransactionId);
|
|
|
|
probe lwlock__acquire(const char *, LWLockMode);
|
|
probe lwlock__release(const char *);
|
|
probe lwlock__wait__start(const char *, LWLockMode);
|
|
probe lwlock__wait__done(const char *, LWLockMode);
|
|
probe lwlock__condacquire(const char *, LWLockMode);
|
|
probe lwlock__condacquire__fail(const char *, LWLockMode);
|
|
probe lwlock__acquire__or__wait(const char *, LWLockMode);
|
|
probe lwlock__acquire__or__wait__fail(const char *, LWLockMode);
|
|
|
|
probe lock__wait__start(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
|
|
probe lock__wait__done(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
|
|
|
|
probe query__parse__start(const char *);
|
|
probe query__parse__done(const char *);
|
|
probe query__rewrite__start(const char *);
|
|
probe query__rewrite__done(const char *);
|
|
probe query__plan__start();
|
|
probe query__plan__done();
|
|
probe query__execute__start();
|
|
probe query__execute__done();
|
|
probe query__start(const char *);
|
|
probe query__done(const char *);
|
|
probe statement__status(const char *);
|
|
|
|
probe sort__start(int, bool, int, int, bool, int);
|
|
probe sort__done(bool, long);
|
|
|
|
probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int);
|
|
probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool);
|
|
probe buffer__flush__start(ForkNumber, BlockNumber, Oid, Oid, Oid);
|
|
probe buffer__flush__done(ForkNumber, BlockNumber, Oid, Oid, Oid);
|
|
probe buffer__extend__start(ForkNumber, Oid, Oid, Oid, int, unsigned int);
|
|
probe buffer__extend__done(ForkNumber, Oid, Oid, Oid, int, unsigned int, BlockNumber);
|
|
|
|
probe buffer__checkpoint__start(int);
|
|
probe buffer__checkpoint__sync__start();
|
|
probe buffer__checkpoint__done();
|
|
probe buffer__sync__start(int, int);
|
|
probe buffer__sync__written(int);
|
|
probe buffer__sync__done(int, int, int);
|
|
|
|
probe deadlock__found();
|
|
|
|
probe checkpoint__start(int);
|
|
probe checkpoint__done(int, int, int, int, int);
|
|
probe clog__checkpoint__start(bool);
|
|
probe clog__checkpoint__done(bool);
|
|
probe subtrans__checkpoint__start(bool);
|
|
probe subtrans__checkpoint__done(bool);
|
|
probe multixact__checkpoint__start(bool);
|
|
probe multixact__checkpoint__done(bool);
|
|
probe twophase__checkpoint__start();
|
|
probe twophase__checkpoint__done();
|
|
|
|
probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int);
|
|
probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int);
|
|
probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int);
|
|
probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int);
|
|
|
|
probe wal__insert(unsigned char, unsigned char);
|
|
probe wal__switch();
|
|
probe wal__buffer__write__dirty__start();
|
|
probe wal__buffer__write__dirty__done();
|
|
};
|