Fix latch event policy that hid socket events.

If a WaitEventSetWait() caller asks for multiple events, an already set
latch would previously prevent other events from being reported at the
same time.  Now, we'll also poll the kernel for other events that would
fit in the caller's output buffer with a zero wait time.  This policy
change doesn't affect callers that ask for only one event.

The main caller affected is the postmaster.  If its latch is set
extremely frequently by backends launching workers and workers exiting,
we don't want it to handle only those jobs and ignore incoming client
connections.

Back-patch to 16 where the postmaster began using the API.  The
fast-return policy changed here is older than that, but doesn't cause
any known problems in earlier releases.

Reported-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: Nathan Bossart <nathandbossart@gmail.com>
Discussion: https://postgr.es/m/Z1n5UpAiGDmFcMmd%40nathan
This commit is contained in:
Thomas Munro 2025-01-20 15:17:47 +13:00
parent e6767c0ed1
commit 44f400fbc6

View File

@ -1458,9 +1458,9 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
int rc;
/*
* Check if the latch is set already. If so, leave the loop
* immediately, avoid blocking again. We don't attempt to report any
* other events that might also be satisfied.
* Check if the latch is set already first. If so, we either exit
* immediately or ask the kernel for further events available right
* now without waiting, depending on how many events the caller wants.
*
* If someone sets the latch between this and the
* WaitEventSetWaitBlock() below, the setter will write a byte to the
@ -1505,7 +1505,16 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
/* could have been set above */
set->latch->maybe_sleeping = false;
break;
if (returned_events == nevents)
break; /* output buffer full already */
/*
* Even though we already have an event, we'll poll just once with
* zero timeout to see what non-latch events we can fit into the
* output buffer at the same time.
*/
cur_timeout = 0;
timeout = 0;
}
/*
@ -1514,18 +1523,16 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
* to retry, everything >= 1 is the number of returned events.
*/
rc = WaitEventSetWaitBlock(set, cur_timeout,
occurred_events, nevents);
occurred_events, nevents - returned_events);
if (set->latch)
{
Assert(set->latch->maybe_sleeping);
if (set->latch &&
set->latch->maybe_sleeping)
set->latch->maybe_sleeping = false;
}
if (rc == -1)
break; /* timeout occurred */
else
returned_events = rc;
returned_events += rc;
/* If we're not done, update cur_timeout for next iteration */
if (returned_events == 0 && timeout >= 0)
@ -1613,7 +1620,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
/* Drain the signalfd. */
drain();
if (set->latch && set->latch->is_set)
if (set->latch && set->latch->maybe_sleeping && set->latch->is_set)
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_LATCH_SET;
@ -1772,7 +1779,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
if (cur_event->events == WL_LATCH_SET &&
cur_kqueue_event->filter == EVFILT_SIGNAL)
{
if (set->latch && set->latch->is_set)
if (set->latch && set->latch->maybe_sleeping && set->latch->is_set)
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_LATCH_SET;
@ -1897,7 +1904,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
/* There's data in the self-pipe, clear it. */
drain();
if (set->latch && set->latch->is_set)
if (set->latch && set->latch->maybe_sleeping && set->latch->is_set)
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_LATCH_SET;
@ -2113,7 +2120,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
if (!ResetEvent(set->handles[cur_event->pos + 1]))
elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
if (set->latch && set->latch->is_set)
if (set->latch && set->latch->maybe_sleeping && set->latch->is_set)
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_LATCH_SET;