Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions src/llmcompressor/modeling/qwen3_vl_moe.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import torch
from transformers import Qwen3VLMoeConfig, Qwen3VLMoeTextConfig
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
Qwen3VLMoeTextSparseMoeBlock as OriginalQwen3VLMoeTextSparseMoeBlock,
)

from llmcompressor.modeling.moe_context import MoECalibrationModule
from llmcompressor.utils.dev import skip_weights_initialize

if TYPE_CHECKING:
from transformers import Qwen3VLMoeConfig, Qwen3VLMoeTextConfig
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
Qwen3VLMoeTextSparseMoeBlock,
)


@MoECalibrationModule.register("Qwen3VLMoeTextSparseMoeBlock")
class CalibrateQwen3VLMoeTextSparseMoeBlock(MoECalibrationModule):
Expand All @@ -19,7 +25,7 @@ class CalibrateQwen3VLMoeTextSparseMoeBlock(MoECalibrationModule):

def __init__(
self,
original: OriginalQwen3VLMoeTextSparseMoeBlock,
original: Qwen3VLMoeTextSparseMoeBlock,
config: Qwen3VLMoeConfig,
calibrate_all_experts: bool,
):
Expand Down Expand Up @@ -116,7 +122,7 @@ def __init__(self, config, original):

def replace(
config: Qwen3VLMoeConfig,
original: OriginalQwen3VLMoeTextSparseMoeBlock,
original: Qwen3VLMoeTextSparseMoeBlock,
calibrate_all_experts: bool,
):
return CalibrateQwen3VLMoeTextSparseMoeBlock(
Expand Down
18 changes: 14 additions & 4 deletions tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
import pytest
import torch
from transformers import Qwen3VLMoeConfig
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
Qwen3VLMoeTextSparseMoeBlock,
)

from llmcompressor.modeling.qwen3_vl_moe import CalibrateQwen3VLMoeTextSparseMoeBlock
from llmcompressor.utils.helpers import calibration_forward_context
from tests.testing_utils import requires_gpu

try:
from transformers import Qwen3VLMoeConfig
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
Qwen3VLMoeTextSparseMoeBlock,
)
except ImportError:
Qwen3VLMoeConfig = None
Qwen3VLMoeTextSparseMoeBlock = None


@pytest.mark.skipif(
Qwen3VLMoeConfig is None,
reason="Qwen3VLMoe not available in this version of transformers",
)
@requires_gpu
def test_calib_qwen3_vl_moe_module():
config = Qwen3VLMoeConfig()
Expand Down
Loading