Skip to content

Commit 3653f33

Browse files
authored
avoid mrope fusion op when running qwen2.5-vl on a+x machine (#4270)
### What this PR does / why we need it? avoid mrope fusion op when running qwen2.5-vl on a+x machine ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? Test text VQA accuracy on G8600 with aisbench - vLLM version: v0.11.0 - vLLM main: vllm-project/vllm@2918c1b --------- Signed-off-by: 李少鹏 <lishaopeng21@huawei.com>
1 parent c848da0 commit 3653f33

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

tests/ut/ops/test_rotary_embedding.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from vllm.config import ModelConfig, VllmConfig
88
from vllm.model_executor.layers.rotary_embedding import (
99
DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding)
10+
from vllm.platforms import CpuArchEnum
1011

1112
from tests.ut.base import TestBase
1213
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
@@ -424,11 +425,14 @@ def _create_vllm_config(self):
424425
return vllm_config
425426

426427
@patch('torch_npu.npu_mrope')
428+
@patch('vllm_ascend.platform.NPUPlatform.get_cpu_architecture')
427429
@patch('vllm.config.ModelConfig.__post_init__', MagicMock())
428430
@patch('vllm.config.VllmConfig.__post_init__', MagicMock())
429431
@patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1))
430432
@patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1))
431-
def test_forward_oot_1d_positions(self, mock_npu_mrope):
433+
def test_forward_oot_1d_positions(self, mock_cpu_arc, mock_npu_mrope):
434+
mock_cpu_arc.return_value = CpuArchEnum.ARM
435+
432436
mock_npu_mrope.return_value = (torch.zeros_like(self.query),
433437
torch.zeros_like(self.key))
434438

@@ -443,11 +447,14 @@ def test_forward_oot_1d_positions(self, mock_npu_mrope):
443447
self.assertEqual(result_q.shape, self.query.shape)
444448

445449
@patch('torch_npu.npu_mrope')
450+
@patch('vllm_ascend.platform.NPUPlatform.get_cpu_architecture')
446451
@patch('vllm.config.ModelConfig.__post_init__', MagicMock())
447452
@patch('vllm.config.VllmConfig.__post_init__', MagicMock())
448453
@patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1))
449454
@patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1))
450-
def test_forward_oot_2d_positions(self, mock_npu_mrope):
455+
def test_forward_oot_2d_positions(self, mock_cpu_arc, mock_npu_mrope):
456+
mock_cpu_arc.return_value = CpuArchEnum.ARM
457+
451458
mock_npu_mrope.return_value = (torch.zeros_like(self.query),
452459
torch.zeros_like(self.key))
453460

vllm_ascend/ops/rotary_embedding.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from vllm.model_executor.layers.rotary_embedding import (
2525
DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding,
2626
YaRNScalingRotaryEmbedding)
27+
from vllm.platforms import CpuArchEnum
2728

2829
from vllm_ascend.platform import NPUPlatform
2930
from vllm_ascend.utils import enable_custom_op, is_310p
@@ -405,7 +406,10 @@ def forward_oot(
405406
query: torch.Tensor,
406407
key: torch.Tensor,
407408
):
408-
if self.mrope_section != [16, 24, 24]:
409+
# TODO: This judgment will be removed once the mrope precision issue is fixed
410+
if self.mrope_section != [
411+
16, 24, 24
412+
] or NPUPlatform.get_cpu_architecture() == CpuArchEnum.X86:
409413
return super().forward_oot(positions, query, key)
410414

411415
import torch_npu
@@ -428,4 +432,4 @@ def forward_oot(
428432
mrope_section=mrope_section,
429433
rotary_mode='half')
430434

431-
return query, key
435+
return query, key

0 commit comments

Comments
 (0)