[Core] GC Debug callback (vllm-project#24829)

Jialin · JialinOuyang-Meta · web-flow · commit c216119d649a · 2025-09-27T17:53:31.000Z
Signed-off-by: Jialin Ouyang &lt;jialino@meta.com&gt;
Signed-off-by: Jialin Ouyang &lt;Jialin.Ouyang@gmail.com&gt;
Co-authored-by: Jialin Ouyang &lt;jialino@meta.com&gt;
diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md
@@ -208,3 +208,11 @@ One example is [snakeviz](https://jiffyclub.github.io/snakeviz/).
 pip install snakeviz
 snakeviz expensive_function.prof
 ```
+
+### Analyzing Garbage Collection Costs
+
+Leverage VLLM_GC_DEBUG environment variable to debug GC costs.
+
+- VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times
+- VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger to log top 5
+  collected objects for each gc.collect
diff --git a/tests/utils_/test_gc_utils.py b/tests/utils_/test_gc_utils.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+from typing import Any
+
+from vllm.utils.gc_utils import (GCDebugConfig, _compute_detailed_type,
+                                 _compute_top_gc_collected_objects)
+
+
+@dataclass
+class Normal:
+    v: int
+
+
+@dataclass
+class ListWrapper:
+    vs: list[int]
+
+    def __len__(self) -> int:
+        return len(self.vs)
+
+
+def test_compute_detailed_type():
+    assert _compute_detailed_type(
+        Normal(v=8)) == "<class 'tests.utils_.test_gc_utils.Normal'>"
+
+    assert _compute_detailed_type([1, 2, 3]) == "<class 'list'>(size:3)"
+    assert _compute_detailed_type({4, 5}) == "<class 'set'>(size:2)"
+    assert _compute_detailed_type({6: 7}) == "<class 'dict'>(size:1)"
+    assert _compute_detailed_type(ListWrapper(
+        vs=[])) == "<class 'tests.utils_.test_gc_utils.ListWrapper'>(size:0)"
+
+
+def test_compute_top_gc_collected_objects():
+    objects: list[Any] = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
+                          {13, 14}, {
+                              15: 16,
+                              17: 18
+                          },
+                          Normal(v=19),
+                          Normal(v=20),
+                          Normal(v=21)]
+    assert _compute_top_gc_collected_objects(objects, top=-1) == ""
+    assert _compute_top_gc_collected_objects(objects, top=0) == ""
+    assert _compute_top_gc_collected_objects(
+        objects, top=1) == "    4:<class 'list'>(size:3)"
+    assert _compute_top_gc_collected_objects(objects, top=2) == "\n".join([
+        "    4:<class 'list'>(size:3)",
+        "    3:<class 'tests.utils_.test_gc_utils.Normal'>"
+    ])
+    assert _compute_top_gc_collected_objects(objects, top=3) == "\n".join([
+        "    4:<class 'list'>(size:3)",
+        "    3:<class 'tests.utils_.test_gc_utils.Normal'>",
+        "    1:<class 'set'>(size:2)"
+    ])
+
+
+def test_gc_debug_config():
+    assert not GCDebugConfig(None).enabled
+    assert not GCDebugConfig("").enabled
+    assert not GCDebugConfig("0").enabled
+
+    config = GCDebugConfig("1")
+    assert config.enabled
+    assert config.top_objects == -1
+
+    config = GCDebugConfig("{\"top_objects\":5}")
+    assert config.enabled
+    assert config.top_objects == 5
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -205,6 +205,7 @@
     VLLM_USE_NCCL_SYMM_MEM: bool = False
     VLLM_NCCL_INCLUDE_PATH: Optional[str] = None
     VLLM_USE_FBGEMM: bool = False
+    VLLM_GC_DEBUG: str = ""
 
 
 def get_default_cache_root():
@@ -1475,6 +1476,13 @@ def get_vllm_port() -> Optional[int]:
     lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None),
     # Flag to enable FBGemm kernels on model execution
     "VLLM_USE_FBGEMM": lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))),
+
+    # GC debug config
+    # - VLLM_GC_DEBUG=0: disable GC debugger
+    # - VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times
+    # - VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger with
+    #                                      top 5 collected objects
+    "VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""),
 }
 
 # --8<-- [end:env-vars-definition]
diff --git a/vllm/utils/gc_utils.py b/vllm/utils/gc_utils.py
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import gc
+import json
+import time
+from collections import Counter
+from contextlib import suppress
+from typing import Any, Optional
+
+from vllm.envs import VLLM_GC_DEBUG
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+class GCDebugConfig:
+    """
+    Config for GC Debugger.
+    - 0: disable GC debugger
+    - 1: enable GC debugger with gc.collect elpased times
+    - '{"top_objects":5}': enable GC debugger with top 5 collected objects
+    """
+
+    def __init__(self, gc_debug_conf: Optional[str] = None) -> None:
+        self.enabled: bool = False
+        self.top_objects: int = -1
+
+        if not gc_debug_conf or gc_debug_conf == "0":
+            pass
+        elif gc_debug_conf == "1":
+            self.enabled = True
+        else:
+            try:
+                json_conf = json.loads(gc_debug_conf)
+                self.enabled = True
+                self.top_objects = json_conf.get("top_objects", -1)
+            except Exception:
+                self.enabled = False
+                logger.error("Failed to parse VLLM_GC_DEBUG(%s)",
+                             VLLM_GC_DEBUG)
+        logger.info("GC Debug Config. %s", str(self))
+
+    def __repr__(self) -> str:
+        return f"enabled:{self.enabled},top_objects:{self.top_objects}"
+
+
+class GCDebugger:
+    """
+    Debugger for GC which logs helpful information for GC understanding.
+    To enable, you should call maybe_attach_gc_debug_callback in the process.
+    """
+
+    def __init__(self, config: GCDebugConfig) -> None:
+        self.config = config
+        # Start time in micro second of this GC cycle
+        self.start_time_ns: int = time.monotonic_ns()
+        # If config.top_objects is positive,
+        # compute top collected objects by object types
+        self.gc_top_collected_objects: str = ""
+
+    def handle(self, phase: str, info: dict[str, int]) -> None:
+        """
+        Handles a GC event (e.g. GC start or GC finish)
+        """
+        generation = info.get("generation")
+        if generation is None:
+            return
+        if phase == "start":
+            # Before GC started, record GC start time
+            # and top collected objects
+            self.start_time_ns = time.monotonic_ns()
+            self.gc_top_collected_objects = _compute_top_gc_collected_objects(
+                gc.get_objects(generation), self.config.top_objects)
+        elif phase == "stop":
+            # After GC finished, Record GC elapsed time and
+            # optionally top collected objects
+            elpased_ms = (time.monotonic_ns() - self.start_time_ns) / 1e6
+            logger.info(
+                "GC took %.3fms to complete. "
+                "Collected %s objects in GC generation %d.%s",
+                elpased_ms,
+                str(info.get("collected", "?")),
+                generation,
+                (f" Top collected objects: \n{self.gc_top_collected_objects}"
+                 if self.gc_top_collected_objects else ""),
+            )
+
+
+def maybe_attach_gc_debug_callback() -> None:
+    """
+    Attached a callback for GC debug when VLLM_GC_DEBUG is enabled.
+    """
+    config = GCDebugConfig(VLLM_GC_DEBUG)
+    if config.enabled:
+        debugger: GCDebugger = GCDebugger(config)
+
+        def gc_callback(phase: str, info: dict[str, int]) -> None:
+            debugger.handle(phase, info)
+
+        gc.callbacks.append(gc_callback)
+
+
+def _compute_detailed_type(o: Any) -> str:
+    """
+    Detailed object type.
+
+    TODO(Jialin): Further enhance the detailed type with element types for
+    easier debugging. We tried but occasionally it would run into signals
+    which kills the engine.
+    """
+    size_str: str = ""
+    # Object doesn't support len() - this can happen with type objects
+    # or other objects that don't implement __len__ properly
+    with suppress(Exception):
+        size_str = f"(size:{len(o)})"
+    return f"{str(type(o))}{size_str}"
+
+
+def _compute_top_gc_collected_objects(objects: list[Any], top: int) -> str:
+    """
+    Group collected objects by types.
+    """
+    if top <= 0:
+        return ""
+    object_types = [_compute_detailed_type(o) for o in objects]
+    return "\n".join(
+        f"{count:>5}:{object_type}"
+        for object_type, count in Counter(object_types).most_common(top))
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -29,6 +29,7 @@
     maybe_register_config_serialize_by_value)
 from vllm.utils import (decorate_logs, get_hash_fn_by_name, make_zmq_socket,
                         resolve_obj_by_qualname, set_process_title)
+from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
 from vllm.v1.core.kv_cache_utils import (BlockHash,
                                          generate_scheduler_kv_cache_config,
                                          get_kv_cache_configs,
@@ -532,6 +533,9 @@ def __init__(
         gc.collect()
         gc.freeze()
 
+        # If enable, attach GC debugger after static variable freeze.
+        maybe_attach_gc_debug_callback()
+
     @contextmanager
     def _perform_handshakes(
         self,