Skip to content

Commit 4faf918

Browse files
committed
Fix UT test and add AWQ model CI.
Signed-off-by: menogrey <1299267905@qq.com>
1 parent 0d49bbb commit 4faf918

File tree

3 files changed

+24
-4
lines changed

3 files changed

+24
-4
lines changed

tests/e2e/singlecard/test_quantization.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,16 @@ def test_quant_W8A8():
3333
quantization="ascend",
3434
) as vllm_model:
3535
vllm_model.generate_greedy(example_prompts, max_tokens)
36+
37+
def test_quant_awq():
38+
max_tokens = 5
39+
example_prompts = [
40+
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs."
41+
]
42+
with VllmRunner(
43+
snapshot_download("Qwen/Qwen2.5-0.5B-Instruct-AWQ"),
44+
max_model_len=8192,
45+
enforce_eager=False,
46+
gpu_memory_utilization=0.7,
47+
) as vllm_model:
48+
vllm_model.generate_greedy(example_prompts, max_tokens)

tests/ut/quantization/test_quant_config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,18 @@ def test_from_config(self):
6464
def test_override_quantization_method(self, mock_is_available):
6565
# Test when NPU is available
6666
mock_is_available.return_value = True
67-
result = AscendQuantConfig.override_quantization_method(None, None)
67+
hf_quant_cfg = {}
68+
result = AscendQuantConfig.override_quantization_method(hf_quant_cfg, None)
6869
self.assertEqual(result, ASCEND_QUANTIZATION_METHOD)
6970

7071
# Test when NPU is not available
7172
mock_is_available.return_value = False
72-
result = AscendQuantConfig.override_quantization_method(None, None)
73+
result = AscendQuantConfig.override_quantization_method(hf_quant_cfg, None)
74+
self.assertIsNone(result)
75+
76+
# Test when quant_method is specified
77+
hf_quant_cfg = {"quant_method": "awq"}
78+
result = AscendQuantConfig.override_quantization_method(hf_quant_cfg, None)
7379
self.assertIsNone(result)
7480

7581
def test_get_quant_method_for_linear(self):

tests/ut/test_platform.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
from tests.ut.base import TestBase
1111
from vllm_ascend.platform import NPUPlatform
12-
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, AscendDeviceType
12+
from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD, AWQ_QUANTIZATION_METHOD,
13+
AscendDeviceType)
1314

1415

1516
class TestNPUPlatform(TestBase):
@@ -48,7 +49,7 @@ def test_class_variables(self):
4849
"ASCEND_RT_VISIBLE_DEVICES")
4950
self.assertEqual(NPUPlatform.dispatch_key, "PrivateUse1")
5051
self.assertEqual(NPUPlatform.supported_quantization,
51-
[ASCEND_QUANTIZATION_METHOD])
52+
[ASCEND_QUANTIZATION_METHOD, AWQ_QUANTIZATION_METHOD])
5253

5354
def test_is_sleep_mode_available(self):
5455
self.assertTrue(self.platform.is_sleep_mode_available())

0 commit comments

Comments
 (0)