We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
vllm_is_batch_invariant
1 parent f31419e commit ac0bb2cCopy full SHA for ac0bb2c
vllm/model_executor/layers/batch_invariant.py
@@ -4,6 +4,7 @@
4
import os
5
from collections import namedtuple
6
from collections.abc import Callable
7
+from functools import cache
8
from typing import Any
9
10
import torch
@@ -857,6 +858,7 @@ def get_batch_invariant_attention_block_size() -> AttentionBlockSize:
857
858
return AttentionBlockSize(block_m=16, block_n=16)
859
860
861
+@cache
862
def vllm_is_batch_invariant():
863
env_key = "VLLM_BATCH_INVARIANT"
864
is_overridden = False
0 commit comments