File tree Expand file tree Collapse file tree 3 files changed +24
-4
lines changed Expand file tree Collapse file tree 3 files changed +24
-4
lines changed Original file line number Diff line number Diff line change @@ -33,3 +33,16 @@ def test_quant_W8A8():
3333 quantization = "ascend" ,
3434 ) as vllm_model :
3535 vllm_model .generate_greedy (example_prompts , max_tokens )
36+
37+ def test_quant_awq ():
38+ max_tokens = 5
39+ example_prompts = [
40+ "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs."
41+ ]
42+ with VllmRunner (
43+ snapshot_download ("Qwen/Qwen2.5-0.5B-Instruct-AWQ" ),
44+ max_model_len = 8192 ,
45+ enforce_eager = False ,
46+ gpu_memory_utilization = 0.7 ,
47+ ) as vllm_model :
48+ vllm_model .generate_greedy (example_prompts , max_tokens )
Original file line number Diff line number Diff line change @@ -64,12 +64,18 @@ def test_from_config(self):
6464 def test_override_quantization_method (self , mock_is_available ):
6565 # Test when NPU is available
6666 mock_is_available .return_value = True
67- result = AscendQuantConfig .override_quantization_method (None , None )
67+ hf_quant_cfg = {}
68+ result = AscendQuantConfig .override_quantization_method (hf_quant_cfg , None )
6869 self .assertEqual (result , ASCEND_QUANTIZATION_METHOD )
6970
7071 # Test when NPU is not available
7172 mock_is_available .return_value = False
72- result = AscendQuantConfig .override_quantization_method (None , None )
73+ result = AscendQuantConfig .override_quantization_method (hf_quant_cfg , None )
74+ self .assertIsNone (result )
75+
76+ # Test when quant_method is specified
77+ hf_quant_cfg = {"quant_method" : "awq" }
78+ result = AscendQuantConfig .override_quantization_method (hf_quant_cfg , None )
7379 self .assertIsNone (result )
7480
7581 def test_get_quant_method_for_linear (self ):
Original file line number Diff line number Diff line change 99
1010from tests .ut .base import TestBase
1111from vllm_ascend .platform import NPUPlatform
12- from vllm_ascend .utils import ASCEND_QUANTIZATION_METHOD , AscendDeviceType
12+ from vllm_ascend .utils import (ASCEND_QUANTIZATION_METHOD , AWQ_QUANTIZATION_METHOD ,
13+ AscendDeviceType )
1314
1415
1516class TestNPUPlatform (TestBase ):
@@ -48,7 +49,7 @@ def test_class_variables(self):
4849 "ASCEND_RT_VISIBLE_DEVICES" )
4950 self .assertEqual (NPUPlatform .dispatch_key , "PrivateUse1" )
5051 self .assertEqual (NPUPlatform .supported_quantization ,
51- [ASCEND_QUANTIZATION_METHOD ])
52+ [ASCEND_QUANTIZATION_METHOD , AWQ_QUANTIZATION_METHOD ])
5253
5354 def test_is_sleep_mode_available (self ):
5455 self .assertTrue (self .platform .is_sleep_mode_available ())
You can’t perform that action at this time.
0 commit comments