File tree Expand file tree Collapse file tree 2 files changed +8
-0
lines changed Expand file tree Collapse file tree 2 files changed +8
-0
lines changed Original file line number Diff line number Diff line change 112112 VLLM_ROCM_USE_AITER_FP8BMM : bool = True
113113 VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION : bool = False
114114 VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS : bool = True
115+ VLLM_ROCM_USE_AITER_TRITON_GEMM : bool = True
115116 VLLM_ROCM_USE_SKINNY_GEMM : bool = True
116117 VLLM_ROCM_FP8_PADDING : bool = True
117118 VLLM_ROCM_MOE_PADDING : bool = True
@@ -938,6 +939,12 @@ def get_vllm_port() -> int | None:
938939 os .getenv ("VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS" , "True" ).lower ()
939940 in ("true" , "1" )
940941 ),
942+ # Whether to use aiter triton kernels for gemm ops.
943+ # By default is enabled.
944+ "VLLM_ROCM_USE_AITER_TRITON_GEMM" : lambda : (
945+ os .getenv ("VLLM_ROCM_USE_AITER_TRITON_GEMM" , "True" ).lower ()
946+ in ("true" , "1" )
947+ ),
941948 # use rocm skinny gemms
942949 "VLLM_ROCM_USE_SKINNY_GEMM" : lambda : (
943950 os .getenv ("VLLM_ROCM_USE_SKINNY_GEMM" , "True" ).lower () in ("true" , "1" )
Original file line number Diff line number Diff line change @@ -106,6 +106,7 @@ def default_unquantized_gemm(
106106def use_aiter_triton_gemm (n , m , k , dtype ):
107107 if (
108108 envs .VLLM_ROCM_USE_AITER == 0
109+ or envs .VLLM_ROCM_USE_AITER_TRITON_GEMM == 0
109110 # MI300's - fp8nuz=True
110111 or current_platform .is_fp8_fnuz ()
111112 or dtype not in [torch .float16 , torch .bfloat16 ]
You can’t perform that action at this time.
0 commit comments