fix(profiling): track running asyncio loop if it exists (#15120)

KowalskiThomas · web-flow · commit fd9e58b1ee51 · 2025-11-07T14:31:05.000+01:00
## Description https://datadoghq.atlassian.net/browse/PROF-12842 This PR updates the wrapping and thread-registering logic for the Profiler in order to track the running loop when it exists. This is needed because otherwise, importing/starting the Profiler after starting a Task (or a loop more generally) will make us blind to the existing running loop. Currently, we `wrap` the `asyncio.set_event_loop` function to capture when the Event Loop is first set (or is swapped). However, if the `_asyncio` module that sets up wrapping is imported/executed _after_ the loop has been set, we will miss that first call to `set_event_loop` and be blind to `asyncio` Tasks until the Event Loop is changed (which in many cases never happens). Note that we also need to execute the "find loop and track it" logic when we start the Profiler generally speaking, as in this case we may have tried (earlier) to call `track_event_loop` but that would have failed as no thread was registered in the Profiler. I added four tests that account for various edge cases. Unfortunately, currently, two of them fail (marked them as `xfail`) and there is no way to correctly fix them. The issue is that we can only get _the current running loop_ and not _the current (non-running) event loop_. In other words, if an event loop is created and set in `asyncio`, and immediately after the Profiler is started without a Task having first been started, we will not be able to see that loop from the initialisation code and we will thus not be able to observe it from the Profiler thread. In short, what works is the most common case: * ✅ Import Profiler, start Profiler, import asyncio, start Tasks * ✅ Import asyncio, Import Profiler, start Profiler, start Tasks * ✅ Import asyncio, Import Profiler, start Tasks (from within the Tasks) * 🚫 Import asyncio, Import Profiler, create (non running) event loop, start Profiler, start Task * 🚫 Import asyncio, Import Profiler, create (non running) event loop, create Task, start Profiler It is OK to start with that as I really consider the latter two to be edge cases. **Example: today we miss all `asyncio` data with the following code** ```py # 0. Profiler is NOT imported here, no watching is set up import os import asyncio async def my_coroutine(n): await asyncio.sleep(n) # 0. Function is defined, not run, Profiler is still not imported async def main(): # 3. We get here, import the Profiler module (and _asyncio as well) # We also start watching for set_event_loop_calls – we don't see the existing loop from ddtrace.profiling import Profiler prof = Profiler() prof.start() # Should be as early as possible, eg before other imports, to ensure everything is profiled EXECUTION_TIME_SEC = int(os.environ.get("EXECUTION_TIME_SEC", "2")) t = asyncio.create_task(my_coroutine(EXECUTION_TIME_SEC / 2)) await asyncio.gather(t, my_coroutine(EXECUTION_TIME_SEC)) # 4. Interestingly, we detect a set_event_loop call here, but it's # being set to None before exiting # 1. This is executed first if __name__ == "__main__": # 2. This implicitly creates and set the Event Loop asyncio.run(main()) ``` ## Testing I have tested this in `prof-correctness` (initially just replicated that it _did not_ work) and it now works as expected. I will be adding more correctness tests, one with a "top of file" import and Profiler start, one with a "top of file import" and "in-code Profiler start", and one with both an "in-code file import" and "in-code Profiler start". I also added four new tests to make sure we catch different edge cases with order of imports and order of task/profiler starts. Currently, two of them are marked as `XFAILED` because there is no way to reliably make them pass.
diff --git a/ddtrace/internal/datadog/profiling/stack_v2/__init__.pyi b/ddtrace/internal/datadog/profiling/stack_v2/__init__.pyi
@@ -4,7 +4,7 @@ from typing import Optional, Sequence, Union
 
 def register_thread(id: int, native_id: int, name: str) -> None: ...  # noqa: A002
 def unregister_thread(name: str) -> None: ...
-def track_asyncio_loop(thread_id: int, loop: asyncio.AbstractEventLoop) -> None: ...
+def track_asyncio_loop(thread_id: int, loop: Optional[asyncio.AbstractEventLoop]) -> None: ...
 def link_tasks(parent: asyncio.AbstractEventLoop, child: asyncio.Task) -> None: ...
 def init_asyncio(
     current_tasks: Sequence[asyncio.Task],
diff --git a/ddtrace/profiling/_asyncio.py b/ddtrace/profiling/_asyncio.py
@@ -20,6 +20,8 @@
 
 THREAD_LINK = None  # type: typing.Optional[_threading._ThreadLink]
 
+ASYNCIO_IMPORTED = False
+
 
 def current_task(loop: typing.Union["asyncio.AbstractEventLoop", None] = None) -> typing.Union["asyncio.Task", None]:
     return None
@@ -35,10 +37,51 @@ def _task_get_name(task: "asyncio.Task") -> str:
     return "Task-%d" % id(task)
 
 
+def _call_init_asyncio(asyncio: ModuleType) -> None:
+    from asyncio import tasks as asyncio_tasks
+
+    if sys.hexversion >= 0x030C0000:
+        scheduled_tasks = asyncio_tasks._scheduled_tasks.data  # type: ignore[attr-defined]
+        eager_tasks = asyncio_tasks._eager_tasks  # type: ignore[attr-defined]
+    else:
+        scheduled_tasks = asyncio_tasks._all_tasks.data  # type: ignore[attr-defined]
+        eager_tasks = None
+
+    stack_v2.init_asyncio(asyncio_tasks._current_tasks, scheduled_tasks, eager_tasks)  # type: ignore[attr-defined]
+
+
+def link_existing_loop_to_current_thread() -> None:
+    global ASYNCIO_IMPORTED
+
+    # Only proceed if asyncio is actually imported and available
+    # Don't rely solely on ASYNCIO_IMPORTED global since it persists across forks
+    if not ASYNCIO_IMPORTED or "asyncio" not in sys.modules:
+        return
+
+    import asyncio
+
+    # Only track if there's actually a running loop
+    running_loop: typing.Union["asyncio.AbstractEventLoop", None] = None
+    try:
+        running_loop = asyncio.get_running_loop()
+    except RuntimeError:
+        # No existing loop to track, nothing to do
+        return
+
+    # We have a running loop, track it
+    assert THREAD_LINK is not None  # nosec: assert is used for typing
+    THREAD_LINK.clear_threads(set(sys._current_frames().keys()))
+    THREAD_LINK.link_object(running_loop)
+    stack_v2.track_asyncio_loop(typing.cast(int, ddtrace_threading.current_thread().ident), running_loop)
+    _call_init_asyncio(asyncio)
+
+
 @ModuleWatchdog.after_module_imported("asyncio")
-def _(asyncio):
-    # type: (ModuleType) -> None
+def _(asyncio: ModuleType) -> None:
     global THREAD_LINK
+    global ASYNCIO_IMPORTED
+
+    ASYNCIO_IMPORTED = True
 
     if hasattr(asyncio, "current_task"):
         globals()["current_task"] = asyncio.current_task
@@ -57,7 +100,7 @@ def _(asyncio):
     if THREAD_LINK is None:
         THREAD_LINK = _threading._ThreadLink()
 
-    init_stack_v2 = config.stack.v2_enabled and stack_v2.is_available
+    init_stack_v2: bool = config.stack.v2_enabled and stack_v2.is_available
 
     @partial(wrap, sys.modules["asyncio.events"].BaseDefaultEventLoopPolicy.set_event_loop)
     def _(f, args, kwargs):
@@ -91,14 +134,7 @@ def _(f, args, kwargs):
                 for child in children:
                     stack_v2.link_tasks(parent, child)
 
-        if sys.hexversion >= 0x030C0000:
-            scheduled_tasks = asyncio.tasks._scheduled_tasks.data
-            eager_tasks = asyncio.tasks._eager_tasks
-        else:
-            scheduled_tasks = asyncio.tasks._all_tasks.data
-            eager_tasks = None
-
-        stack_v2.init_asyncio(asyncio.tasks._current_tasks, scheduled_tasks, eager_tasks)
+        _call_init_asyncio(asyncio)
 
 
 def get_event_loop_for_thread(thread_id: int) -> typing.Union["asyncio.AbstractEventLoop", None]:
diff --git a/ddtrace/profiling/collector/threading.py b/ddtrace/profiling/collector/threading.py
@@ -68,3 +68,8 @@ def thread_bootstrap_inner(self, *args, **kwargs):
         # Instrument any living threads
         for thread_id, thread in ddtrace_threading._active.items():  # type: ignore[attr-defined]
             stack_v2.register_thread(thread_id, thread.native_id, thread.name)
+
+        # Import _asyncio to ensure asyncio post-import wrappers are initialised
+        from ddtrace.profiling import _asyncio  # noqa: F401
+
+        _asyncio.link_existing_loop_to_current_thread()
diff --git a/releasenotes/notes/profiling-fix-untracked-existing-event-loop-6842ff15328dae9c.yaml b/releasenotes/notes/profiling-fix-untracked-existing-event-loop-6842ff15328dae9c.yaml
@@ -0,0 +1,4 @@
+fixes:
+  - |
+    profiling: this fix resolves an issue where importing the profiler module after an asyncio Event Loop had been
+    started would make the Profiler blind to the existing Event Loop and its Tasks.
diff --git a/tests/profiling_v2/collector/test_stack_asyncio.py b/tests/profiling_v2/collector/test_stack_asyncio.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +fixes:
 +  - |
 +    profiling: this fix resolves an issue where importing the profiler module after an asyncio Event Loop had been
 +    started would make the Profiler blind to the existing Event Loop and its Tasks.