mirror of
https://github.com/element-hq/synapse.git
synced 2025-11-09 00:02:41 -05:00
Fix lost logcontext when using timeout_deferred(...) (#19090)
Fix lost logcontext when using `timeout_deferred(...)` and things actually timeout. Fix https://github.com/element-hq/synapse/issues/19087 (our HTTP client times out requests using `timeout_deferred(...)` Fix https://github.com/element-hq/synapse/issues/19066 (`/sync` uses `notifier.wait_for_events()` which uses `timeout_deferred(...)` under the hood) ### When/why did these lost logcontext warnings start happening? ``` synapse.logging.context - 107 - WARNING - sentinel - Expected logging context call_later but found POST-2453 synapse.logging.context - 107 - WARNING - sentinel - Expected logging context call_later was lost ``` In https://github.com/element-hq/synapse/pull/18828, we switched `timeout_deferred(...)` from using `reactor.callLater(...)` to [`clock.call_later(...)`](3b59ac3b69/synapse/util/clock.py (L224-L313)) under the hood. This meant it started dealing with logcontexts but our `time_it_out()` callback didn't follow our [Synapse logcontext rules](3b59ac3b69/docs/log_contexts.md).
This commit is contained in:
parent
f0aae62f85
commit
c0b9437ab6
1
changelog.d/19090.bugfix
Normal file
1
changelog.d/19090.bugfix
Normal file
@ -0,0 +1 @@
|
||||
Fix lost logcontext warnings from timeouts in sync and requests made by Synapse itself.
|
||||
@ -896,7 +896,7 @@ def run_in_background(
|
||||
# If the function messes with logcontexts, we can assume it follows the Synapse
|
||||
# logcontext rules (Rules for functions returning awaitables: "If the awaitable
|
||||
# is already complete, the function returns with the same logcontext it started
|
||||
# with."). If it function doesn't touch logcontexts at all, we can also assume
|
||||
# with."). If the function doesn't touch logcontexts at all, we can also assume
|
||||
# the logcontext is unchanged.
|
||||
#
|
||||
# Either way, the function should have maintained the calling logcontext, so we
|
||||
@ -905,8 +905,18 @@ def run_in_background(
|
||||
# to reset the logcontext to the sentinel logcontext as that would run
|
||||
# immediately (remember our goal is to maintain the calling logcontext when we
|
||||
# return).
|
||||
if current_context() != calling_context:
|
||||
logcontext_error(
|
||||
"run_in_background(%s): deferred already completed but the function did not maintain the calling logcontext %s (found %s)"
|
||||
% (
|
||||
instance_id,
|
||||
calling_context,
|
||||
current_context(),
|
||||
)
|
||||
)
|
||||
else:
|
||||
logcontext_debug_logger.debug(
|
||||
"run_in_background(%s): deferred already completed and the function should have maintained the logcontext %s",
|
||||
"run_in_background(%s): deferred already completed (maintained the calling logcontext %s)",
|
||||
instance_id,
|
||||
calling_context,
|
||||
)
|
||||
|
||||
@ -808,6 +808,7 @@ def timeout_deferred(
|
||||
timed_out[0] = True
|
||||
|
||||
try:
|
||||
with PreserveLoggingContext():
|
||||
deferred.cancel()
|
||||
except Exception: # if we throw any exception it'll break time outs
|
||||
logger.exception("Canceller failed during timeout")
|
||||
|
||||
@ -266,7 +266,7 @@ class Clock:
|
||||
# We use `PreserveLoggingContext` to prevent our new `call_later`
|
||||
# logcontext from finishing as soon as we exit this function, in case `f`
|
||||
# returns an awaitable/deferred which would continue running and may try to
|
||||
# restore the `loop_call` context when it's done (because it's trying to
|
||||
# restore the `call_later` context when it's done (because it's trying to
|
||||
# adhere to the Synapse logcontext rules.)
|
||||
#
|
||||
# This also ensures that we return to the `sentinel` context when we exit
|
||||
|
||||
@ -341,6 +341,9 @@ def logcontext_clean(target: TV) -> TV:
|
||||
"""
|
||||
|
||||
def logcontext_error(msg: str) -> NoReturn:
|
||||
# Log so we can still see it in the logs like normal
|
||||
logger.warning(msg)
|
||||
# But also fail the test
|
||||
raise AssertionError("logcontext error: %s" % (msg))
|
||||
|
||||
patcher = patch("synapse.logging.context.logcontext_error", new=logcontext_error)
|
||||
|
||||
@ -45,7 +45,7 @@ from synapse.util.async_helpers import (
|
||||
)
|
||||
|
||||
from tests.server import get_clock
|
||||
from tests.unittest import TestCase
|
||||
from tests.unittest import TestCase, logcontext_clean
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -198,7 +198,12 @@ class TimeoutDeferredTest(TestCase):
|
||||
|
||||
self.failureResultOf(timing_out_d, defer.TimeoutError)
|
||||
|
||||
async def test_logcontext_is_preserved_on_cancellation(self) -> None:
|
||||
@logcontext_clean
|
||||
async def test_logcontext_is_preserved_on_timeout_cancellation(self) -> None:
|
||||
"""
|
||||
Test that the logcontext is preserved when we timeout and the deferred is
|
||||
cancelled.
|
||||
"""
|
||||
# Sanity check that we start in the sentinel context
|
||||
self.assertEqual(current_context(), SENTINEL_CONTEXT)
|
||||
|
||||
@ -270,6 +275,65 @@ class TimeoutDeferredTest(TestCase):
|
||||
# Back to the sentinel context
|
||||
self.assertEqual(current_context(), SENTINEL_CONTEXT)
|
||||
|
||||
@logcontext_clean
|
||||
async def test_logcontext_is_not_lost_when_awaiting_on_timeout_cancellation(
|
||||
self,
|
||||
) -> None:
|
||||
"""
|
||||
Test that the logcontext isn't lost when we `await make_deferred_yieldable(...)`
|
||||
the deferred to complete/timeout and it times out.
|
||||
"""
|
||||
|
||||
# Sanity check that we start in the sentinel context
|
||||
self.assertEqual(current_context(), SENTINEL_CONTEXT)
|
||||
|
||||
# Create a deferred which we will never complete
|
||||
incomplete_d: Deferred = Deferred()
|
||||
|
||||
async def competing_task() -> None:
|
||||
with LoggingContext(
|
||||
name="competing", server_name="test_server"
|
||||
) as context_competing:
|
||||
timing_out_d = timeout_deferred(
|
||||
deferred=incomplete_d,
|
||||
timeout=1.0,
|
||||
clock=self.clock,
|
||||
)
|
||||
self.assertNoResult(timing_out_d)
|
||||
# We should still be in the logcontext we started in
|
||||
self.assertIs(current_context(), context_competing)
|
||||
|
||||
# Mimic the normal use case to wait for the work to complete or timeout.
|
||||
#
|
||||
# In this specific test, we expect the deferred to timeout and raise an
|
||||
# exception at this point.
|
||||
await make_deferred_yieldable(timing_out_d)
|
||||
|
||||
self.fail(
|
||||
"We should not make it to this point as the `timing_out_d` should have been cancelled"
|
||||
)
|
||||
|
||||
d = defer.ensureDeferred(competing_task())
|
||||
|
||||
# Still in the sentinel context
|
||||
self.assertEqual(current_context(), SENTINEL_CONTEXT)
|
||||
|
||||
# Pump until we trigger the timeout
|
||||
self.reactor.pump(
|
||||
# We only need to pump `1.0` (seconds) as we set
|
||||
# `timeout_deferred(timeout=1.0)` above
|
||||
(1.0,)
|
||||
)
|
||||
|
||||
# Still in the sentinel context
|
||||
self.assertEqual(current_context(), SENTINEL_CONTEXT)
|
||||
|
||||
# We expect a failure due to the timeout
|
||||
self.failureResultOf(d, defer.TimeoutError)
|
||||
|
||||
# Back to the sentinel context at the end of the day
|
||||
self.assertEqual(current_context(), SENTINEL_CONTEXT)
|
||||
|
||||
|
||||
class _TestException(Exception): #
|
||||
pass
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user