Skip to content

Commit c87094f

Browse files
committed
Add env to enable/disable aiter triton gemm
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
1 parent d0ceb38 commit c87094f

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

vllm/envs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
VLLM_ROCM_USE_AITER_FP8BMM: bool = True
113113
VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION: bool = False
114114
VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS: bool = True
115+
VLLM_ROCM_USE_AITER_TRITON_GEMM: bool = True
115116
VLLM_ROCM_USE_SKINNY_GEMM: bool = True
116117
VLLM_ROCM_FP8_PADDING: bool = True
117118
VLLM_ROCM_MOE_PADDING: bool = True
@@ -938,6 +939,12 @@ def get_vllm_port() -> int | None:
938939
os.getenv("VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS", "True").lower()
939940
in ("true", "1")
940941
),
942+
# Whether to use aiter triton kernels for gemm ops.
943+
# By default is enabled.
944+
"VLLM_ROCM_USE_AITER_TRITON_GEMM": lambda: (
945+
os.getenv("VLLM_ROCM_USE_AITER_TRITON_GEMM", "True").lower()
946+
in ("true", "1")
947+
),
941948
# use rocm skinny gemms
942949
"VLLM_ROCM_USE_SKINNY_GEMM": lambda: (
943950
os.getenv("VLLM_ROCM_USE_SKINNY_GEMM", "True").lower() in ("true", "1")

vllm/model_executor/layers/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def default_unquantized_gemm(
106106
def use_aiter_triton_gemm(n, m, k, dtype):
107107
if (
108108
envs.VLLM_ROCM_USE_AITER == 0
109+
or envs.VLLM_ROCM_USE_AITER_TRITON_GEMM == 0
109110
# MI300's - fp8nuz=True
110111
or current_platform.is_fp8_fnuz()
111112
or dtype not in [torch.float16, torch.bfloat16]

0 commit comments

Comments
 (0)