vllm-project · kylesayrs · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/src/llmcompressor/modeling/qwen3_vl_moe.py b/src/llmcompressor/modeling/qwen3_vl_moe.py
@@ -1,12 +1,18 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import torch
-from transformers import Qwen3VLMoeConfig, Qwen3VLMoeTextConfig
-from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
-    Qwen3VLMoeTextSparseMoeBlock as OriginalQwen3VLMoeTextSparseMoeBlock,
-)
 
 from llmcompressor.modeling.moe_context import MoECalibrationModule
 from llmcompressor.utils.dev import skip_weights_initialize
 
+if TYPE_CHECKING:
+    from transformers import Qwen3VLMoeConfig, Qwen3VLMoeTextConfig
+    from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
+        Qwen3VLMoeTextSparseMoeBlock,
+    )
+
 
 @MoECalibrationModule.register("Qwen3VLMoeTextSparseMoeBlock")
 class CalibrateQwen3VLMoeTextSparseMoeBlock(MoECalibrationModule):
@@ -19,7 +25,7 @@ class CalibrateQwen3VLMoeTextSparseMoeBlock(MoECalibrationModule):
 
     def __init__(
         self,
-        original: OriginalQwen3VLMoeTextSparseMoeBlock,
+        original: Qwen3VLMoeTextSparseMoeBlock,
         config: Qwen3VLMoeConfig,
         calibrate_all_experts: bool,
     ):
@@ -116,7 +122,7 @@ def __init__(self, config, original):
 
 def replace(
     config: Qwen3VLMoeConfig,
-    original: OriginalQwen3VLMoeTextSparseMoeBlock,
+    original: Qwen3VLMoeTextSparseMoeBlock,
     calibrate_all_experts: bool,
 ):
     return CalibrateQwen3VLMoeTextSparseMoeBlock(

diff --git a/tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py b/tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py
@@ -1,14 +1,24 @@
+import pytest
 import torch
-from transformers import Qwen3VLMoeConfig
-from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
-    Qwen3VLMoeTextSparseMoeBlock,
-)
 
 from llmcompressor.modeling.qwen3_vl_moe import CalibrateQwen3VLMoeTextSparseMoeBlock
 from llmcompressor.utils.helpers import calibration_forward_context
 from tests.testing_utils import requires_gpu
 
+try:
+    from transformers import Qwen3VLMoeConfig
+    from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
+        Qwen3VLMoeTextSparseMoeBlock,
+    )
+except ImportError:
+    Qwen3VLMoeConfig = None
+    Qwen3VLMoeTextSparseMoeBlock = None
+
 
+@pytest.mark.skipif(
+    Qwen3VLMoeConfig is None,
+    reason="Qwen3VLMoe not available in this version of transformers",
+)
 @requires_gpu
 def test_calib_qwen3_vl_moe_module():
     config = Qwen3VLMoeConfig()