Skip to content

Commit 4728418

Browse files
committed
Adding new MoE e2e tests [wip]
1 parent 6ff1118 commit 4728418

File tree

5 files changed

+29
-4
lines changed

5 files changed

+29
-4
lines changed

src/llmcompressor/transformers/compression/compressed_tensors_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,12 @@ def untie_word_embeddings(model: PreTrainedModel):
126126
127127
:param model: model to fix
128128
"""
129-
input_embed = model.get_input_embeddings()
130-
output_embed = model.get_output_embeddings()
129+
try:
130+
input_embed = model.get_input_embeddings()
131+
output_embed = model.get_output_embeddings()
132+
except NotImplementedError as e:
133+
logger.warning(f"cannot untie model of type {model.__class__} which doesn't have get_input_embeddings and get_output_embeddings implmented\n{e}")
134+
return
131135

132136
for module in (input_embed, output_embed):
133137
if module is None or not hasattr(module, "weight"):
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
cadence: "nightly"
2+
test_type: "regression"
3+
model: Qwen/Qwen3-Omni-30B-A3B-Instruct
4+
model_class: "Qwen3OmniMoeForConditionalGeneration"
5+
scheme: NVFP4
6+
num_calibration_samples: 20
7+
dataset_id: HuggingFaceH4/ultrachat_200k
8+
dataset_split: train_sft
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
cadence: "nightly"
2+
test_type: "regression"
3+
model: Qwen/Qwen3-Omni-30B-A3B-Instruct
4+
model_class: "Qwen3OmniMoeForConditionalGeneration"
5+
scheme: FP8_DYNAMIC
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
cadence: "nightly"
2+
test_type: "regression"
3+
model: Qwen/Qwen3-Omni-30B-A3B-Instruct
4+
model_class: "Qwen3OmniMoeForConditionalGeneration"
5+
scheme: W4A16
6+
dataset_id: HuggingFaceH4/ultrachat_200k
7+
dataset_split: train_sft
8+
quant_type: "GPTQ"

tests/e2e/vLLM/test_vllm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ class TestvLLM:
5959
def set_up(self, test_data_file: str):
6060
eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))
6161

62-
if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
63-
pytest.skip("Skipping test; cadence mismatch")
62+
# if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
63+
# pytest.skip("Skipping test; cadence mismatch")
6464

6565
self.model = eval_config["model"]
6666
self.model_class = eval_config.get("model_class", "AutoModelForCausalLM")

0 commit comments

Comments
 (0)