mirror of
https://github.com/postgres/postgres.git
synced 2025-06-01 00:01:20 -04:00
pgstat: Allow checksum errors to be reported in critical sections
For AIO we execute completion callbacks in critical sections (to ensure that AIO can in the future be used for WAL, which in turn requires that we can call completion callbacks in critical sections, to get the resources for WAL io). To report checksum errors a backend now has to call pgstat_prepare_report_checksum_failure(), before entering a critical section, which guarantees the relevant pgstats entry is in shared memory, the relevant DSM segment is mapped into the backend's memory and the address is known via a PgStat_EntryRef. Reviewed-by: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/wkjj4p2rmkevutkwc6tewoovdqznj6c6nvjmvii4oo5wmbh5sr@retq7d6uqs4j
This commit is contained in:
parent
4244cf6876
commit
b96d3c3897
@ -1817,6 +1817,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
checksum_failures,
|
||||
readfilename, checksum_failures)));
|
||||
|
||||
pgstat_prepare_report_checksum_failure(dboid);
|
||||
pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
|
||||
}
|
||||
|
||||
|
@ -524,6 +524,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
|
||||
{
|
||||
RelFileLocatorBackend rloc = src->smgr_rlocator;
|
||||
|
||||
pgstat_prepare_report_checksum_failure(rloc.locator.dbOid);
|
||||
pgstat_report_checksum_failures_in_db(rloc.locator.dbOid, 1);
|
||||
}
|
||||
|
||||
|
@ -1590,6 +1590,7 @@ WaitReadBuffers(ReadBuffersOperation *operation)
|
||||
{
|
||||
RelFileLocatorBackend rloc = operation->smgr->smgr_rlocator;
|
||||
|
||||
pgstat_prepare_report_checksum_failure(rloc.locator.dbOid);
|
||||
pgstat_report_checksum_failures_in_db(rloc.locator.dbOid, 1);
|
||||
}
|
||||
|
||||
|
@ -133,8 +133,34 @@ pgstat_report_deadlock(void)
|
||||
dbent->deadlocks++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow this backend to later report checksum failures for dboid, even if in
|
||||
* a critical section at the time of the report.
|
||||
*
|
||||
* Without this function having been called first, the backend might need to
|
||||
* allocate an EntryRef or might need to map in DSM segments. Neither should
|
||||
* happen in a critical section.
|
||||
*/
|
||||
void
|
||||
pgstat_prepare_report_checksum_failure(Oid dboid)
|
||||
{
|
||||
Assert(!CritSectionCount);
|
||||
|
||||
/*
|
||||
* Just need to ensure this backend has an entry ref for the database.
|
||||
* That will allows us to report checksum failures without e.g. needing to
|
||||
* map in DSM segments.
|
||||
*/
|
||||
pgstat_get_entry_ref(PGSTAT_KIND_DATABASE, dboid, InvalidOid,
|
||||
true, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Report one or more checksum failures.
|
||||
*
|
||||
* To be allowed to report checksum failures in critical sections, we require
|
||||
* pgstat_prepare_report_checksum_failure() to have been called before this
|
||||
* function is called.
|
||||
*/
|
||||
void
|
||||
pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
|
||||
@ -147,10 +173,29 @@ pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
|
||||
|
||||
/*
|
||||
* Update the shared stats directly - checksum failures should never be
|
||||
* common enough for that to be a problem.
|
||||
* common enough for that to be a problem. Note that we pass create=false
|
||||
* here, as we want to be sure to not require memory allocations, so this
|
||||
* can be called in critical sections.
|
||||
*/
|
||||
entry_ref =
|
||||
pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, dboid, InvalidOid, false);
|
||||
entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_DATABASE, dboid, InvalidOid,
|
||||
false, NULL);
|
||||
|
||||
/*
|
||||
* Should always have been created by
|
||||
* pgstat_prepare_report_checksum_failure().
|
||||
*
|
||||
* When not using assertions, we don't want to crash should something have
|
||||
* gone wrong, so just return.
|
||||
*/
|
||||
Assert(entry_ref);
|
||||
if (!entry_ref)
|
||||
{
|
||||
elog(WARNING, "could not report %d conflicts for DB %u",
|
||||
failurecount, dboid);
|
||||
return;
|
||||
}
|
||||
|
||||
pgstat_lock_entry(entry_ref, false);
|
||||
|
||||
sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
|
||||
sharedent->stats.checksum_failures += failurecount;
|
||||
|
@ -611,6 +611,7 @@ extern void pgstat_drop_database(Oid databaseid);
|
||||
extern void pgstat_report_autovac(Oid dboid);
|
||||
extern void pgstat_report_recovery_conflict(int reason);
|
||||
extern void pgstat_report_deadlock(void);
|
||||
extern void pgstat_prepare_report_checksum_failure(Oid dboid);
|
||||
extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount);
|
||||
extern void pgstat_report_connect(Oid dboid);
|
||||
extern void pgstat_update_parallel_workers_stats(PgStat_Counter workers_to_launch,
|
||||
|
Loading…
x
Reference in New Issue
Block a user