Fix UT test and add AWQ model CI.

menogrey · menogrey · commit 4faf9181d644 · 2025-11-27T07:24:46.000Z
Signed-off-by: menogrey &lt;1299267905@qq.com&gt;
diff --git a/tests/e2e/singlecard/test_quantization.py b/tests/e2e/singlecard/test_quantization.py
@@ -33,3 +33,16 @@ def test_quant_W8A8():
             quantization="ascend",
     ) as vllm_model:
         vllm_model.generate_greedy(example_prompts, max_tokens)
+
+def test_quant_awq():
+    max_tokens = 5
+    example_prompts = [
+        "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs."
+    ]
+    with VllmRunner(
+            snapshot_download("Qwen/Qwen2.5-0.5B-Instruct-AWQ"),
+            max_model_len=8192,
+            enforce_eager=False,
+            gpu_memory_utilization=0.7,
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
diff --git a/tests/ut/quantization/test_quant_config.py b/tests/ut/quantization/test_quant_config.py
@@ -64,12 +64,18 @@ def test_from_config(self):
     def test_override_quantization_method(self, mock_is_available):
         # Test when NPU is available
         mock_is_available.return_value = True
-        result = AscendQuantConfig.override_quantization_method(None, None)
+        hf_quant_cfg = {}
+        result = AscendQuantConfig.override_quantization_method(hf_quant_cfg, None)
         self.assertEqual(result, ASCEND_QUANTIZATION_METHOD)
 
         # Test when NPU is not available
         mock_is_available.return_value = False
-        result = AscendQuantConfig.override_quantization_method(None, None)
+        result = AscendQuantConfig.override_quantization_method(hf_quant_cfg, None)
+        self.assertIsNone(result)
+
+        # Test when quant_method is specified
+        hf_quant_cfg = {"quant_method": "awq"}
+        result = AscendQuantConfig.override_quantization_method(hf_quant_cfg, None)
         self.assertIsNone(result)
 
     def test_get_quant_method_for_linear(self):
diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py
@@ -9,7 +9,8 @@
 
 from tests.ut.base import TestBase
 from vllm_ascend.platform import NPUPlatform
-from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, AscendDeviceType
+from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD, AWQ_QUANTIZATION_METHOD,
+                               AscendDeviceType)
 
 
 class TestNPUPlatform(TestBase):
@@ -48,7 +49,7 @@ def test_class_variables(self):
                          "ASCEND_RT_VISIBLE_DEVICES")
         self.assertEqual(NPUPlatform.dispatch_key, "PrivateUse1")
         self.assertEqual(NPUPlatform.supported_quantization,
-                         [ASCEND_QUANTIZATION_METHOD])
+                         [ASCEND_QUANTIZATION_METHOD, AWQ_QUANTIZATION_METHOD])
 
     def test_is_sleep_mode_available(self):
         self.assertTrue(self.platform.is_sleep_mode_available())