Heikki Linnakangas 9b38d46d9f Make group commit more effective.
When a backend needs to flush the WAL, and someone else is already flushing
the WAL, wait until it releases the WALInsertLock and check if we still need
to do the flush or if the other backend already did the work for us, before
acquiring WALInsertLock. This helps group commit, because when the WAL flush
finishes, all the backends that were waiting for it can be woken up in one
go, and the can all concurrently observe that they're done, rather than
waking them up one by one in a cascading fashion.

This is based on a new LWLock function, LWLockWaitUntilFree(), which has
peculiar semantics. If the lock is immediately free, it grabs the lock and
returns true. If it's not free, it waits until it is released, but then
returns false without grabbing the lock. This is used in XLogFlush(), so
that when the lock is acquired, the backend flushes the WAL, but if it's
not, the backend first checks the current flush location before retrying.

Original patch and benchmarking by Peter Geoghegan and Simon Riggs, although
this patch as committed ended up being very different from that.
2012-01-30 16:53:48 +02:00

262 lines
9.5 KiB
C

/*-------------------------------------------------------------------------
*
* proc.h
* per-process shared memory data structures
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/proc.h
*
*-------------------------------------------------------------------------
*/
#ifndef _PROC_H_
#define _PROC_H_
#include "access/xlog.h"
#include "datatype/timestamp.h"
#include "storage/latch.h"
#include "storage/lock.h"
#include "storage/pg_sema.h"
/*
* Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
* for non-aborted subtransactions of its current top transaction. These
* have to be treated as running XIDs by other backends.
*
* We also keep track of whether the cache overflowed (ie, the transaction has
* generated at least one subtransaction that didn't fit in the cache).
* If none of the caches have overflowed, we can assume that an XID that's not
* listed anywhere in the PGPROC array is not a running transaction. Else we
* have to look at pg_subtrans.
*/
#define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
struct XidCache
{
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
};
/* Flags for PGPROC->vacuumFlags */
#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
#define PROC_IN_ANALYZE 0x04 /* currently running analyze */
#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
/* flags reset at EOXact */
#define PROC_VACUUM_STATE_MASK (0x0E)
/*
* We allow a small number of "weak" relation locks (AccesShareLock,
* RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
* rather than the main lock table. This eases contention on the lock
* manager LWLocks. See storage/lmgr/README for additional details.
*/
#define FP_LOCK_SLOTS_PER_BACKEND 16
/*
* Each backend has a PGPROC struct in shared memory. There is also a list of
* currently-unused PGPROC structs that will be reallocated to new backends.
*
* links: list link for any list the PGPROC is in. When waiting for a lock,
* the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
* is linked into ProcGlobal's freeProcs list.
*
* Note: twophase.c also sets up a dummy PGPROC struct for each currently
* prepared transaction. These PGPROCs appear in the ProcArray data structure
* so that the prepared transactions appear to be still running and are
* correctly shown as holding locks. A prepared transaction PGPROC can be
* distinguished from a real one at need by the fact that it has pid == 0.
* The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
* but its myProcLocks[] lists are valid.
*/
struct PGPROC
{
/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
SHM_QUEUE links; /* list link if process is in a list */
PGSemaphoreData sem; /* ONE semaphore to sleep on */
int waitStatus; /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
Latch procLatch; /* generic latch for process */
LocalTransactionId lxid; /* local id of top-level transaction currently
* being executed by this proc, if running;
* else InvalidLocalTransactionId */
int pid; /* Backend's process ID; 0 if prepared xact */
int pgprocno;
/* These fields are zero while a backend is still starting up: */
BackendId backendId; /* This backend's backend ID (if assigned) */
Oid databaseId; /* OID of database this backend is using */
Oid roleId; /* OID of role using this backend */
/*
* While in hot standby mode, shows that a conflict signal has been sent
* for the current transaction. Set/cleared while holding ProcArrayLock,
* though not required. Accessed without lock, if needed.
*/
bool recoveryConflictPending;
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
uint8 lwWaitMode; /* lwlock mode being waited for */
struct PGPROC *lwWaitLink; /* next waiter for same LW lock */
/* Info about lock the process is currently waiting for, if any. */
/* waitLock and waitProcLock are NULL if not currently waiting. */
LOCK *waitLock; /* Lock object we're sleeping on ... */
PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */
LOCKMODE waitLockMode; /* type of lock we're waiting for */
LOCKMASK heldLocks; /* bitmask for lock types already held on this
* lock object by this backend */
/*
* Info to allow us to wait for synchronous replication, if needed.
* waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
* syncRepState must not be touched except by owning process or WALSender.
* syncRepLinks used only while holding SyncRepLock.
*/
XLogRecPtr waitLSN; /* waiting for this LSN or higher */
int syncRepState; /* wait state for sync rep */
SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
/*
* All PROCLOCK objects for locks held or awaited by this backend are
* linked into one of these lists, according to the partition number of
* their lock.
*/
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
struct XidCache subxids; /* cache for subtransaction XIDs */
/* Per-backend LWLock. Protects fields below. */
LWLockId backendLock; /* protects the fields below */
/* Lock manager data, recording fast-path locks taken by this backend. */
uint64 fpLockBits; /* lock modes held for each fast-path slot */
Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID lock */
};
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
extern PGDLLIMPORT PGPROC *MyProc;
extern PGDLLIMPORT struct PGXACT *MyPgXact;
/*
* Prior to PostgreSQL 9.2, the fields below were stored as part of the
* PGPROC. However, benchmarking revealed that packing these particular
* members into a separate array as tightly as possible sped up GetSnapshotData
* considerably on systems with many CPU cores, by reducing the number of
* cache lines needing to be fetched. Thus, think very carefully before adding
* anything else here.
*/
typedef struct PGXACT
{
TransactionId xid; /* id of top-level transaction currently being
* executed by this proc, if running and XID
* is assigned; else InvalidTransactionId */
TransactionId xmin; /* minimal running XID as it was when we were
* starting our xact, excluding LAZY VACUUM:
* vacuum must not remove tuples deleted by
* xid >= xmin ! */
uint8 vacuumFlags; /* vacuum-related flags, see above */
bool overflowed;
bool inCommit; /* true if within commit critical section */
uint8 nxids;
} PGXACT;
/*
* There is one ProcGlobal struct for the whole database cluster.
*/
typedef struct PROC_HDR
{
/* Array of PGPROC structures (not including dummies for prepared txns) */
PGPROC *allProcs;
/* Array of PGXACT structures (not including dummies for prepared txns */
PGXACT *allPgXact;
/* Length of allProcs array */
uint32 allProcCount;
/* Head of list of free PGPROC structures */
PGPROC *freeProcs;
/* Head of list of autovacuum's free PGPROC structures */
PGPROC *autovacFreeProcs;
/* BGWriter process latch */
Latch *bgwriterLatch;
/* Current shared estimate of appropriate spins_per_delay value */
int spins_per_delay;
/* The proc of the Startup process, since not in ProcArray */
PGPROC *startupProc;
int startupProcPid;
/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
int startupBufferPinWaitBufId;
} PROC_HDR;
extern PROC_HDR *ProcGlobal;
extern PGPROC *PreparedXactProcs;
/*
* We set aside some extra PGPROC structures for auxiliary processes,
* ie things that aren't full-fledged backends but need shmem access.
*
* Background writer, checkpointer and WAL writer run during normal operation.
* Startup process and WAL receiver also consume 2 slots, but WAL writer is
* launched only after startup has exited, so we only need 4 slots.
*/
#define NUM_AUXILIARY_PROCS 4
/* configurable options */
extern int DeadlockTimeout;
extern int StatementTimeout;
extern bool log_lock_waits;
extern volatile bool cancel_from_timeout;
/*
* Function Prototypes
*/
extern int ProcGlobalSemas(void);
extern Size ProcGlobalShmemSize(void);
extern void InitProcGlobal(void);
extern void InitProcess(void);
extern void InitProcessPhase2(void);
extern void InitAuxiliaryProcess(void);
extern void PublishStartupProcessInformation(void);
extern void SetStartupBufferPinWaitBufId(int bufid);
extern int GetStartupBufferPinWaitBufId(void);
extern bool HaveNFreeProcs(int n);
extern void ProcReleaseLocks(bool isCommit);
extern void ProcQueueInit(PROC_QUEUE *queue);
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
extern bool IsWaitingForLock(void);
extern void LockWaitCancel(void);
extern void ProcWaitForSignal(void);
extern void ProcSendSignal(int pid);
extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
extern bool disable_sig_alarm(bool is_statement_timeout);
extern void handle_sig_alarm(SIGNAL_ARGS);
extern bool enable_standby_sig_alarm(TimestampTz now,
TimestampTz fin_time, bool deadlock_only);
extern bool disable_standby_sig_alarm(void);
extern void handle_standby_sig_alarm(SIGNAL_ARGS);
#endif /* PROC_H */