We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 22e44ad commit 8f4f77aCopy full SHA for 8f4f77a
vllm/config/compilation.py
@@ -921,7 +921,7 @@ def adjust_cudagraph_sizes_for_spec_decode(
921
self, uniform_decode_query_len: int, tensor_parallel_size: int
922
):
923
multiple_of = uniform_decode_query_len
924
- if tensor_parallel_size > 1:
+ if tensor_parallel_size > 1 and self.pass_config.enable_sequence_parallelism:
925
multiple_of = max(uniform_decode_query_len, tensor_parallel_size)
926
if (
927
multiple_of % uniform_decode_query_len != 0
0 commit comments