Skip to content

Commit 5cc49fe

Browse files
committed
fix latest rebase from 7291cdc
Signed-off-by: John Calderon <jcalderon@nvidia.com>
1 parent fa24744 commit 5cc49fe

File tree

2 files changed

+4
-5
lines changed

2 files changed

+4
-5
lines changed

tensorrt_llm/_torch/pyexecutor/_util.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,7 @@ def configure_kv_cache_capacity(self, py_executor: PyExecutor) -> None:
416416
# set max_gpu_total_bytes
417417
self._kv_cache_config.max_gpu_total_bytes = kv_cache_max_memory
418418
if isinstance(self._profiling_stage_data, dict):
419-
self._profiling_stage_data[
420-
"max_gpu_total_bytes"] = kv_cache_max_memory
419+
self._profiling_stage_data["activation_bytes"] = activation_bytes
421420
# ---------------------------handle max_gpu_total_bytes---------------------------------
422421

423422
def _create_kv_cache_manager(

tests/unittest/llmapi/test_memory_profiling.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def test_profile_kvcache():
6262
py_executor = create_py_executor(llm_args=torchllm_args,
6363
checkpoint_dir=VLM_MODEL_PATH,
6464
profiling_stage_data=profiling_data)
65-
vlm_max_gpu_total_bytes_with_mm_reqs = profiling_data["max_gpu_total_bytes"]
65+
vlm_activation_bytes_with_mm_reqs = profiling_data["activation_bytes"]
6666
py_executor.shutdown()
6767
torch.cuda.empty_cache()
6868

@@ -71,8 +71,8 @@ def test_profile_kvcache():
7171
py_executor_2 = create_py_executor(llm_args=torchllm_args,
7272
checkpoint_dir=VLM_MODEL_PATH,
7373
profiling_stage_data=profiling_data)
74-
vlm_max_gpu_total_bytes_no_mm_reqs = profiling_data["max_gpu_total_bytes"]
74+
vlm_activation_bytes_no_mm_reqs = profiling_data["activation_bytes"]
7575
py_executor_2.shutdown()
7676
torch.cuda.empty_cache()
7777

78-
assert vlm_max_gpu_total_bytes_with_mm_reqs < vlm_max_gpu_total_bytes_no_mm_reqs, f"available KVCache for VLMs is expected to be less when profiling with mm reqs, but got {vlm_max_gpu_total_bytes_with_mm_reqs} for mm reqs and {vlm_max_gpu_total_bytes_no_mm_reqs} without mm reqs"
78+
assert vlm_activation_bytes_with_mm_reqs > vlm_activation_bytes_no_mm_reqs, f"Activation bytes should be higher with mm reqs, but got {vlm_activation_bytes_with_mm_reqs} for mm reqs and {vlm_activation_bytes_no_mm_reqs} without mm reqs"

0 commit comments

Comments
 (0)