mirror of
				https://github.com/element-hq/synapse.git
				synced 2025-11-04 00:01:22 -05:00 
			
		
		
		
	Fix lost logcontext when using timeout_deferred(...) (#19090)
				
					
				
			Fix lost logcontext when using `timeout_deferred(...)` and things actually timeout. Fix https://github.com/element-hq/synapse/issues/19087 (our HTTP client times out requests using `timeout_deferred(...)` Fix https://github.com/element-hq/synapse/issues/19066 (`/sync` uses `notifier.wait_for_events()` which uses `timeout_deferred(...)` under the hood) ### When/why did these lost logcontext warnings start happening? ``` synapse.logging.context - 107 - WARNING - sentinel - Expected logging context call_later but found POST-2453 synapse.logging.context - 107 - WARNING - sentinel - Expected logging context call_later was lost ``` In https://github.com/element-hq/synapse/pull/18828, we switched `timeout_deferred(...)` from using `reactor.callLater(...)` to [`clock.call_later(...)`](3b59ac3b69/synapse/util/clock.py (L224-L313)) under the hood. This meant it started dealing with logcontexts but our `time_it_out()` callback didn't follow our [Synapse logcontext rules](3b59ac3b69/docs/log_contexts.md).
This commit is contained in:
		
							parent
							
								
									f0aae62f85
								
							
						
					
					
						commit
						c0b9437ab6
					
				
							
								
								
									
										1
									
								
								changelog.d/19090.bugfix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changelog.d/19090.bugfix
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
Fix lost logcontext warnings from timeouts in sync and requests made by Synapse itself.
 | 
			
		||||
@ -896,7 +896,7 @@ def run_in_background(
 | 
			
		||||
        # If the function messes with logcontexts, we can assume it follows the Synapse
 | 
			
		||||
        # logcontext rules (Rules for functions returning awaitables: "If the awaitable
 | 
			
		||||
        # is already complete, the function returns with the same logcontext it started
 | 
			
		||||
        # with."). If it function doesn't touch logcontexts at all, we can also assume
 | 
			
		||||
        # with."). If the function doesn't touch logcontexts at all, we can also assume
 | 
			
		||||
        # the logcontext is unchanged.
 | 
			
		||||
        #
 | 
			
		||||
        # Either way, the function should have maintained the calling logcontext, so we
 | 
			
		||||
@ -905,11 +905,21 @@ def run_in_background(
 | 
			
		||||
        # to reset the logcontext to the sentinel logcontext as that would run
 | 
			
		||||
        # immediately (remember our goal is to maintain the calling logcontext when we
 | 
			
		||||
        # return).
 | 
			
		||||
        logcontext_debug_logger.debug(
 | 
			
		||||
            "run_in_background(%s): deferred already completed and the function should have maintained the logcontext %s",
 | 
			
		||||
            instance_id,
 | 
			
		||||
            calling_context,
 | 
			
		||||
        )
 | 
			
		||||
        if current_context() != calling_context:
 | 
			
		||||
            logcontext_error(
 | 
			
		||||
                "run_in_background(%s): deferred already completed but the function did not maintain the calling logcontext %s (found %s)"
 | 
			
		||||
                % (
 | 
			
		||||
                    instance_id,
 | 
			
		||||
                    calling_context,
 | 
			
		||||
                    current_context(),
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            logcontext_debug_logger.debug(
 | 
			
		||||
                "run_in_background(%s): deferred already completed (maintained the calling logcontext %s)",
 | 
			
		||||
                instance_id,
 | 
			
		||||
                calling_context,
 | 
			
		||||
            )
 | 
			
		||||
        return d
 | 
			
		||||
 | 
			
		||||
    # Since the function we called may follow the Synapse logcontext rules (Rules for
 | 
			
		||||
 | 
			
		||||
@ -808,7 +808,8 @@ def timeout_deferred(
 | 
			
		||||
        timed_out[0] = True
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            deferred.cancel()
 | 
			
		||||
            with PreserveLoggingContext():
 | 
			
		||||
                deferred.cancel()
 | 
			
		||||
        except Exception:  # if we throw any exception it'll break time outs
 | 
			
		||||
            logger.exception("Canceller failed during timeout")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -266,7 +266,7 @@ class Clock:
 | 
			
		||||
                # We use `PreserveLoggingContext` to prevent our new `call_later`
 | 
			
		||||
                # logcontext from finishing as soon as we exit this function, in case `f`
 | 
			
		||||
                # returns an awaitable/deferred which would continue running and may try to
 | 
			
		||||
                # restore the `loop_call` context when it's done (because it's trying to
 | 
			
		||||
                # restore the `call_later` context when it's done (because it's trying to
 | 
			
		||||
                # adhere to the Synapse logcontext rules.)
 | 
			
		||||
                #
 | 
			
		||||
                # This also ensures that we return to the `sentinel` context when we exit
 | 
			
		||||
 | 
			
		||||
@ -341,6 +341,9 @@ def logcontext_clean(target: TV) -> TV:
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def logcontext_error(msg: str) -> NoReturn:
 | 
			
		||||
        # Log so we can still see it in the logs like normal
 | 
			
		||||
        logger.warning(msg)
 | 
			
		||||
        # But also fail the test
 | 
			
		||||
        raise AssertionError("logcontext error: %s" % (msg))
 | 
			
		||||
 | 
			
		||||
    patcher = patch("synapse.logging.context.logcontext_error", new=logcontext_error)
 | 
			
		||||
 | 
			
		||||
@ -45,7 +45,7 @@ from synapse.util.async_helpers import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from tests.server import get_clock
 | 
			
		||||
from tests.unittest import TestCase
 | 
			
		||||
from tests.unittest import TestCase, logcontext_clean
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
@ -198,7 +198,12 @@ class TimeoutDeferredTest(TestCase):
 | 
			
		||||
 | 
			
		||||
        self.failureResultOf(timing_out_d, defer.TimeoutError)
 | 
			
		||||
 | 
			
		||||
    async def test_logcontext_is_preserved_on_cancellation(self) -> None:
 | 
			
		||||
    @logcontext_clean
 | 
			
		||||
    async def test_logcontext_is_preserved_on_timeout_cancellation(self) -> None:
 | 
			
		||||
        """
 | 
			
		||||
        Test that the logcontext is preserved when we timeout and the deferred is
 | 
			
		||||
        cancelled.
 | 
			
		||||
        """
 | 
			
		||||
        # Sanity check that we start in the sentinel context
 | 
			
		||||
        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 | 
			
		||||
 | 
			
		||||
@ -270,6 +275,65 @@ class TimeoutDeferredTest(TestCase):
 | 
			
		||||
        # Back to the sentinel context
 | 
			
		||||
        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 | 
			
		||||
 | 
			
		||||
    @logcontext_clean
 | 
			
		||||
    async def test_logcontext_is_not_lost_when_awaiting_on_timeout_cancellation(
 | 
			
		||||
        self,
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        """
 | 
			
		||||
        Test that the logcontext isn't lost when we `await make_deferred_yieldable(...)`
 | 
			
		||||
        the deferred to complete/timeout and it times out.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # Sanity check that we start in the sentinel context
 | 
			
		||||
        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 | 
			
		||||
 | 
			
		||||
        # Create a deferred which we will never complete
 | 
			
		||||
        incomplete_d: Deferred = Deferred()
 | 
			
		||||
 | 
			
		||||
        async def competing_task() -> None:
 | 
			
		||||
            with LoggingContext(
 | 
			
		||||
                name="competing", server_name="test_server"
 | 
			
		||||
            ) as context_competing:
 | 
			
		||||
                timing_out_d = timeout_deferred(
 | 
			
		||||
                    deferred=incomplete_d,
 | 
			
		||||
                    timeout=1.0,
 | 
			
		||||
                    clock=self.clock,
 | 
			
		||||
                )
 | 
			
		||||
                self.assertNoResult(timing_out_d)
 | 
			
		||||
                # We should still be in the logcontext we started in
 | 
			
		||||
                self.assertIs(current_context(), context_competing)
 | 
			
		||||
 | 
			
		||||
                # Mimic the normal use case to wait for the work to complete or timeout.
 | 
			
		||||
                #
 | 
			
		||||
                # In this specific test, we expect the deferred to timeout and raise an
 | 
			
		||||
                # exception at this point.
 | 
			
		||||
                await make_deferred_yieldable(timing_out_d)
 | 
			
		||||
 | 
			
		||||
                self.fail(
 | 
			
		||||
                    "We should not make it to this point as the `timing_out_d` should have been cancelled"
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        d = defer.ensureDeferred(competing_task())
 | 
			
		||||
 | 
			
		||||
        # Still in the sentinel context
 | 
			
		||||
        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 | 
			
		||||
 | 
			
		||||
        # Pump until we trigger the timeout
 | 
			
		||||
        self.reactor.pump(
 | 
			
		||||
            # We only need to pump `1.0` (seconds) as we set
 | 
			
		||||
            # `timeout_deferred(timeout=1.0)` above
 | 
			
		||||
            (1.0,)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Still in the sentinel context
 | 
			
		||||
        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 | 
			
		||||
 | 
			
		||||
        # We expect a failure due to the timeout
 | 
			
		||||
        self.failureResultOf(d, defer.TimeoutError)
 | 
			
		||||
 | 
			
		||||
        # Back to the sentinel context at the end of the day
 | 
			
		||||
        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _TestException(Exception):  #
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user