We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c3e2978 commit 100b3fdCopy full SHA for 100b3fd
vllm/config/compilation.py
@@ -917,7 +917,7 @@ def adjust_cudagraph_sizes_for_spec_decode(
917
self, uniform_decode_query_len: int, tensor_parallel_size: int
918
):
919
multiple_of = uniform_decode_query_len
920
- if tensor_parallel_size > 1:
+ if tensor_parallel_size > 1 and self.pass_config.enable_sequence_parallelism:
921
multiple_of = max(uniform_decode_query_len, tensor_parallel_size)
922
if (
923
multiple_of % uniform_decode_query_len != 0
0 commit comments