making tests run faster

HDCharles · HDCharles · commit a9d56db71be8 · 2025-11-05T18:36:14.000Z
Summary

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;
diff --git a/tests/e2e/vLLM/configs/qwen3_fp4_nvfp4.yaml b/tests/e2e/vLLM/configs/qwen3_fp4_nvfp4.yaml
@@ -1,7 +1,9 @@
 cadence: "nightly"
 test_type: "regression"
 model: Qwen/Qwen3-30B-A3B
+
 scheme: NVFP4
-num_calibration_samples: 20
+
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
+num_calibration_samples: 20
diff --git a/tests/e2e/vLLM/configs/qwen3_fp8_dynamic_per_token.yaml b/tests/e2e/vLLM/configs/qwen3_fp8_dynamic_per_token.yaml
@@ -1,4 +1,5 @@
 cadence: "nightly"
 test_type: "regression"
 model: Qwen/Qwen3-30B-A3B
+
 scheme: FP8_DYNAMIC
diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
@@ -1,7 +1,9 @@
 cadence: "nightly"
 test_type: "regression"
 model: Qwen/Qwen3-30B-A3B
-scheme: W4A16
+
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
-quant_type: "GPTQ"
+num_calibration_samples: 20
+
+recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_20_layers.yaml
diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_20_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_20_layers.yaml
@@ -0,0 +1,20 @@
+quant_stage:
+  quant_modifiers:
+    GPTQModifier:
+      ignore: [
+        "lm_head",
+        # Ignore layers (20+)
+        "re:.*model\\.layers\\.([2-9][0-9])\\..*",
+      ]
+      actorder: null
+      config_groups:
+        group_0:
+          weights:
+            num_bits: 4
+            type: "int"
+            symmetric: False
+            strategy: "group"
+            group_size: 128
+          input_activations: null
+          output_activations: null
+          targets: ["Linear"]