Compare commits

...

3 Commits

Author SHA1 Message Date
Andres Freund
f374fb4aab lwlock: Fix quadratic behavior with very long wait lists
Until now LWLockDequeueSelf() sequentially searched the list of waiters to see
if the current proc is still is on the list of waiters, or has already been
removed. In extreme workloads, where the wait lists are very long, this leads
to a quadratic behavior. #backends iterating over a list #backends
long. Additionally, the likelihood of needing to call LWLockDequeueSelf() in
the first place also increases with the increased length of the wait queue, as
it becomes more likely that a lock is released while waiting for the wait list
lock, which is held for longer during lock release.

Due to the exponential back-off in perform_spin_delay() this is surprisingly
hard to detect. We should make that easier, e.g. by adding a wait event around
the pg_usleep() - but that's a separate patch.

The fix is simple - track whether a proc is currently waiting in the wait list
or already removed but waiting to be woken up in PGPROC->lwWaiting.

In some workloads with a lot of clients contending for a small number of
lwlocks (e.g. WALWriteLock), the fix can substantially increase throughput.

This has been originally fixed for 16~ with a4adc31f6902 without a
backpatch, and we have heard complaints from users impacted by this
quadratic behavior in older versions as well.

Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Discussion: https://postgr.es/m/20221027165914.2hofzp4cvutj6gin@awork3.anarazel.de
Discussion: https://postgr.es/m/CALj2ACXktNbG=K8Xi7PSqbofTZozavhaxjatVc14iYaLu4Maag@mail.gmail.com
Backpatch-through: 12
2024-01-18 11:12:31 +09:00
Heikki Linnakangas
90ccc9bf90 Fix incorrect comment on how BackendStatusArray is indexed
The comment was copy-pasted from the call to ProcSignalInit() in
AuxiliaryProcessMain(), which uses a similar scheme of having reserved
slots for aux processes after MaxBackends slots for backends. However,
ProcSignalInit() indexing starts from 1, whereas BackendStatusArray
starts from 0. The code is correct, but the comment was wrong.

Discussion: https://www.postgresql.org/message-id/f3ecd4cb-85ee-4e54-8278-5fabfb3a4ed0@iki.fi
Backpatch-through: v14
2024-01-17 15:45:57 +02:00
Daniel Gustafsson
551c0b7bda Close socket in case of errors in setting non-blocking
If configuring the newly created socket non-blocking fails we
error out and return INVALID_SOCKET, but the socket that had
been created wasn't closed. Fix by issuing closesocket in the
errorpath.

Backpatch to all supported branches.

Author: Ranier Vilela <ranier.vf@gmail.com>
Discussion: https://postgr.es/m/CAEudQApmU5CrKefH85VbNYE2y8H=-qqEJbg6RAPU65+vCe+89A@mail.gmail.com
Backpatch-through: v12
2024-01-17 11:24:11 +01:00
7 changed files with 46 additions and 30 deletions

View File

@ -486,7 +486,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
proc->roleId = owner; proc->roleId = owner;
proc->tempNamespaceId = InvalidOid; proc->tempNamespaceId = InvalidOid;
proc->isBackgroundWorker = false; proc->isBackgroundWorker = false;
proc->lwWaiting = false; proc->lwWaiting = LW_WS_NOT_WAITING;
proc->lwWaitMode = 0; proc->lwWaitMode = 0;
proc->waitLock = NULL; proc->waitLock = NULL;
proc->waitProcLock = NULL; proc->waitProcLock = NULL;

View File

@ -303,6 +303,7 @@ pgwin32_socket(int af, int type, int protocol)
if (ioctlsocket(s, FIONBIO, &on)) if (ioctlsocket(s, FIONBIO, &on))
{ {
TranslateSocketError(); TranslateSocketError();
closesocket(s);
return INVALID_SOCKET; return INVALID_SOCKET;
} }
errno = 0; errno = 0;

View File

@ -987,6 +987,15 @@ LWLockWakeup(LWLock *lock)
wokeup_somebody = true; wokeup_somebody = true;
} }
/*
* Signal that the process isn't on the wait list anymore. This allows
* LWLockDequeueSelf() to remove itself of the waitlist with a
* proclist_delete(), rather than having to check if it has been
* removed from the list.
*/
Assert(waiter->lwWaiting == LW_WS_WAITING);
waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
/* /*
* Once we've woken up an exclusive lock, there's no point in waking * Once we've woken up an exclusive lock, there's no point in waking
* up anybody else. * up anybody else.
@ -1044,7 +1053,7 @@ LWLockWakeup(LWLock *lock)
* another lock. * another lock.
*/ */
pg_write_barrier(); pg_write_barrier();
waiter->lwWaiting = false; waiter->lwWaiting = LW_WS_NOT_WAITING;
PGSemaphoreUnlock(waiter->sem); PGSemaphoreUnlock(waiter->sem);
} }
} }
@ -1065,7 +1074,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
if (MyProc == NULL) if (MyProc == NULL)
elog(PANIC, "cannot wait without a PGPROC structure"); elog(PANIC, "cannot wait without a PGPROC structure");
if (MyProc->lwWaiting) if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
elog(PANIC, "queueing for lock while waiting on another one"); elog(PANIC, "queueing for lock while waiting on another one");
LWLockWaitListLock(lock); LWLockWaitListLock(lock);
@ -1073,7 +1082,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
/* setting the flag is protected by the spinlock */ /* setting the flag is protected by the spinlock */
pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS); pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
MyProc->lwWaiting = true; MyProc->lwWaiting = LW_WS_WAITING;
MyProc->lwWaitMode = mode; MyProc->lwWaitMode = mode;
/* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */ /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
@ -1100,8 +1109,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
static void static void
LWLockDequeueSelf(LWLock *lock) LWLockDequeueSelf(LWLock *lock)
{ {
bool found = false; bool on_waitlist;
proclist_mutable_iter iter;
#ifdef LWLOCK_STATS #ifdef LWLOCK_STATS
lwlock_stats *lwstats; lwlock_stats *lwstats;
@ -1114,18 +1122,13 @@ LWLockDequeueSelf(LWLock *lock)
LWLockWaitListLock(lock); LWLockWaitListLock(lock);
/* /*
* Can't just remove ourselves from the list, but we need to iterate over * Remove ourselves from the waitlist, unless we've already been
* all entries as somebody else could have dequeued us. * removed. The removal happens with the wait list lock held, so there's
* no race in this check.
*/ */
proclist_foreach_modify(iter, &lock->waiters, lwWaitLink) on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
{ if (on_waitlist)
if (iter.cur == MyProc->pgprocno) proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink);
{
found = true;
proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
break;
}
}
if (proclist_is_empty(&lock->waiters) && if (proclist_is_empty(&lock->waiters) &&
(pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0) (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
@ -1137,8 +1140,8 @@ LWLockDequeueSelf(LWLock *lock)
LWLockWaitListUnlock(lock); LWLockWaitListUnlock(lock);
/* clear waiting state again, nice for debugging */ /* clear waiting state again, nice for debugging */
if (found) if (on_waitlist)
MyProc->lwWaiting = false; MyProc->lwWaiting = LW_WS_NOT_WAITING;
else else
{ {
int extraWaits = 0; int extraWaits = 0;
@ -1162,7 +1165,7 @@ LWLockDequeueSelf(LWLock *lock)
for (;;) for (;;)
{ {
PGSemaphoreLock(MyProc->sem); PGSemaphoreLock(MyProc->sem);
if (!MyProc->lwWaiting) if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
break; break;
extraWaits++; extraWaits++;
} }
@ -1313,7 +1316,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
for (;;) for (;;)
{ {
PGSemaphoreLock(proc->sem); PGSemaphoreLock(proc->sem);
if (!proc->lwWaiting) if (proc->lwWaiting == LW_WS_NOT_WAITING)
break; break;
extraWaits++; extraWaits++;
} }
@ -1478,7 +1481,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
for (;;) for (;;)
{ {
PGSemaphoreLock(proc->sem); PGSemaphoreLock(proc->sem);
if (!proc->lwWaiting) if (proc->lwWaiting == LW_WS_NOT_WAITING)
break; break;
extraWaits++; extraWaits++;
} }
@ -1694,7 +1697,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
for (;;) for (;;)
{ {
PGSemaphoreLock(proc->sem); PGSemaphoreLock(proc->sem);
if (!proc->lwWaiting) if (proc->lwWaiting == LW_WS_NOT_WAITING)
break; break;
extraWaits++; extraWaits++;
} }
@ -1772,6 +1775,10 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
proclist_delete(&lock->waiters, iter.cur, lwWaitLink); proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
proclist_push_tail(&wakeup, iter.cur, lwWaitLink); proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
/* see LWLockWakeup() */
Assert(waiter->lwWaiting == LW_WS_WAITING);
waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
} }
/* We are done updating shared state of the lock itself. */ /* We are done updating shared state of the lock itself. */
@ -1787,7 +1794,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
proclist_delete(&wakeup, iter.cur, lwWaitLink); proclist_delete(&wakeup, iter.cur, lwWaitLink);
/* check comment in LWLockWakeup() about this barrier */ /* check comment in LWLockWakeup() about this barrier */
pg_write_barrier(); pg_write_barrier();
waiter->lwWaiting = false; waiter->lwWaiting = LW_WS_NOT_WAITING;
PGSemaphoreUnlock(waiter->sem); PGSemaphoreUnlock(waiter->sem);
} }
} }

View File

@ -397,7 +397,7 @@ InitProcess(void)
/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
if (IsAutoVacuumWorkerProcess()) if (IsAutoVacuumWorkerProcess())
MyProc->statusFlags |= PROC_IS_AUTOVACUUM; MyProc->statusFlags |= PROC_IS_AUTOVACUUM;
MyProc->lwWaiting = false; MyProc->lwWaiting = LW_WS_NOT_WAITING;
MyProc->lwWaitMode = 0; MyProc->lwWaitMode = 0;
MyProc->waitLock = NULL; MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL; MyProc->waitProcLock = NULL;
@ -579,7 +579,7 @@ InitAuxiliaryProcess(void)
MyProc->isBackgroundWorker = IsBackgroundWorker; MyProc->isBackgroundWorker = IsBackgroundWorker;
MyProc->delayChkptFlags = 0; MyProc->delayChkptFlags = 0;
MyProc->statusFlags = 0; MyProc->statusFlags = 0;
MyProc->lwWaiting = false; MyProc->lwWaiting = LW_WS_NOT_WAITING;
MyProc->lwWaitMode = 0; MyProc->lwWaitMode = 0;
MyProc->waitLock = NULL; MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL; MyProc->waitProcLock = NULL;

View File

@ -263,9 +263,9 @@ pgstat_beinit(void)
* Assign the MyBEEntry for an auxiliary process. Since it doesn't * Assign the MyBEEntry for an auxiliary process. Since it doesn't
* have a BackendId, the slot is statically allocated based on the * have a BackendId, the slot is statically allocated based on the
* auxiliary process type (MyAuxProcType). Backends use slots indexed * auxiliary process type (MyAuxProcType). Backends use slots indexed
* in the range from 1 to MaxBackends (inclusive), so we use * in the range from 0 to MaxBackends (exclusive), so we use
* MaxBackends + AuxBackendType + 1 as the index of the slot for an * MaxBackends + AuxProcType as the index of the slot for an auxiliary
* auxiliary process. * process.
*/ */
MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType]; MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
} }

View File

@ -23,6 +23,14 @@
struct PGPROC; struct PGPROC;
/* what state of the wait process is a backend in */
typedef enum LWLockWaitState
{
LW_WS_NOT_WAITING, /* not currently waiting / woken up */
LW_WS_WAITING, /* currently waiting */
LW_WS_PENDING_WAKEUP /* removed from waitlist, but not yet signalled */
} LWLockWaitState;
/* /*
* Code outside of lwlock.c should not manipulate the contents of this * Code outside of lwlock.c should not manipulate the contents of this
* structure directly, but we have to declare it here to allow LWLocks to be * structure directly, but we have to declare it here to allow LWLocks to be

View File

@ -211,7 +211,7 @@ struct PGPROC
bool recoveryConflictPending; bool recoveryConflictPending;
/* Info about LWLock the process is currently waiting for, if any. */ /* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */ uint8 lwWaiting; /* see LWLockWaitState */
uint8 lwWaitMode; /* lwlock mode being waited for */ uint8 lwWaitMode; /* lwlock mode being waited for */
proclist_node lwWaitLink; /* position in LW lock wait list */ proclist_node lwWaitLink; /* position in LW lock wait list */