Adding new MoE e2e tests [wip]

HDCharles · HDCharles · commit 472841895fd9 · 2025-10-30T13:59:50.000Z
diff --git a/src/llmcompressor/transformers/compression/compressed_tensors_utils.py b/src/llmcompressor/transformers/compression/compressed_tensors_utils.py
@@ -126,8 +126,12 @@ def untie_word_embeddings(model: PreTrainedModel):
 
     :param model: model to fix
     """
-    input_embed = model.get_input_embeddings()
-    output_embed = model.get_output_embeddings()
+    try:
+        input_embed = model.get_input_embeddings()
+        output_embed = model.get_output_embeddings()
+    except NotImplementedError as e:
+        logger.warning(f"cannot untie model of type {model.__class__} which doesn't have get_input_embeddings and get_output_embeddings implmented\n{e}")
+        return
 
     for module in (input_embed, output_embed):
         if module is None or not hasattr(module, "weight"):
diff --git a/tests/e2e/vLLM/configs/fp4_nvfp4_moe.yaml b/tests/e2e/vLLM/configs/fp4_nvfp4_moe.yaml
@@ -0,0 +1,8 @@
+cadence: "nightly"
+test_type: "regression"
+model: Qwen/Qwen3-Omni-30B-A3B-Instruct
+model_class: "Qwen3OmniMoeForConditionalGeneration"
+scheme: NVFP4
+num_calibration_samples: 20
+dataset_id: HuggingFaceH4/ultrachat_200k
+dataset_split: train_sft
diff --git a/tests/e2e/vLLM/configs/fp8_dynamic_per_tensor_moe.yaml b/tests/e2e/vLLM/configs/fp8_dynamic_per_tensor_moe.yaml
@@ -0,0 +1,5 @@
+cadence: "nightly"
+test_type: "regression"
+model: Qwen/Qwen3-Omni-30B-A3B-Instruct
+model_class: "Qwen3OmniMoeForConditionalGeneration"
+scheme: FP8_DYNAMIC
diff --git a/tests/e2e/vLLM/configs/w4a16_channel_grouped_quant_moe.yaml b/tests/e2e/vLLM/configs/w4a16_channel_grouped_quant_moe.yaml
@@ -0,0 +1,8 @@
+cadence: "nightly"
+test_type: "regression"
+model: Qwen/Qwen3-Omni-30B-A3B-Instruct
+model_class: "Qwen3OmniMoeForConditionalGeneration"
+scheme: W4A16
+dataset_id: HuggingFaceH4/ultrachat_200k
+dataset_split: train_sft
+quant_type: "GPTQ"
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
@@ -59,8 +59,8 @@ class TestvLLM:
     def set_up(self, test_data_file: str):
         eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))
 
-        if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
-            pytest.skip("Skipping test; cadence mismatch")
+        # if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
+            # pytest.skip("Skipping test; cadence mismatch")
 
         self.model = eval_config["model"]
         self.model_class = eval_config.get("model_class", "AutoModelForCausalLM")