Combine events from different worker connectors

hickeyma · hickeyma · commit 6487bda20645 · 2025-11-18T14:47:40.000Z
It is part of the aggregation of kv_connector_output from
all workers. For KV cache events, this means combining events
from all workers, remvoing any duplications.

Signed-off-by: Martin Hickey &lt;martin.hickey@ie.ibm.com&gt;
diff --git a/vllm/distributed/kv_events.py b/vllm/distributed/kv_events.py
@@ -54,11 +54,30 @@ class BlockStored(KVCacheEvent):
     lora_id: int | None
     medium: str | None
 
+    def __eq__(self, other):
+        if isinstance(other, BlockStored):
+            return (
+                self.block_hashes == other.block_hashes
+                and self.parent_block_hash == other.parent_block_hash
+                and self.token_ids == other.token_ids
+                and self.block_size == other.block_size
+                and self.lora_id == other.lora_id
+                and self.medium == other.medium
+            )
+        return False
+
 
 class BlockRemoved(KVCacheEvent):
     block_hashes: list[ExternalBlockHash]
     medium: str | None
 
+    def __eq__(self, other):
+        if isinstance(other, BlockRemoved):
+            return (
+                self.block_hashes == other.block_hashes and self.medium == other.medium
+            )
+        return False
+
 
 class AllBlocksCleared(KVCacheEvent):
     pass
@@ -67,6 +86,17 @@ class AllBlocksCleared(KVCacheEvent):
 class KVEventBatch(EventBatch):
     events: list[BlockStored | BlockRemoved | AllBlocksCleared]
 
+    def combine_unique_ordered_events(self, other: "KVEventBatch") -> "KVEventBatch":
+        """
+        Combine non duplicated events with another `KVEventBatch` object.
+        """
+        combined_events = self.events[:]
+        for item in other.events:
+            if item not in combined_events:
+                combined_events.append(item)
+        self.events = combined_events
+        return self
+
 
 class EventPublisher(ABC):
     """Lightweight publisher for EventBatch batches with data parallelism
diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -160,6 +160,7 @@ def update_finished_set(
         finished_sending = set[str]()
         finished_recving = set[str]()
         aggregated_kv_connector_stats = None
+        combined_kv_cache_events = None
         invalid_block_ids = set[int]()
         for model_runner_output in outputs:
             assert model_runner_output is not None
@@ -201,6 +202,21 @@ def update_finished_set(
                         aggregated_kv_connector_stats.aggregate(kv_connector_stats)
                     )
 
+            # Combine kv_cache_events from all workers.
+            if combined_kv_cache_events is None:
+                # Use the first worker's kv_cache events as start event list.
+                combined_kv_cache_events = kv_output.kv_cache_events
+            elif kv_cache_events := kv_output.kv_cache_events:
+                assert isinstance(
+                    combined_kv_cache_events,
+                    type(kv_cache_events),
+                )
+                combined_kv_cache_events = (
+                    combined_kv_cache_events.combine_unique_ordered_events(
+                        kv_cache_events
+                    )
+                )
+
             invalid_block_ids |= kv_output.invalid_block_ids
 
         # select output of the worker specified by output_rank
@@ -211,6 +227,7 @@ def update_finished_set(
             finished_sending=finished_sending or None,
             finished_recving=finished_recving or None,
             kv_connector_stats=aggregated_kv_connector_stats or None,
+            kv_connector_kv_cache_events=combined_kv_cache_events or None,
             invalid_block_ids=invalid_block_ids,
             expected_finished_count=self._expected_finished_count,
         )