Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions conversion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from __future__ import annotations
from .base import (
ModelBase, TextModel, MmprojModel, ModelType, SentencePieceTokenTypes,
logger, _mistral_common_installed, _mistral_import_error_msg,
get_model_architecture, LazyTorchTensor
)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from torch import Tensor # type: ignore # noqa: F401

__all__ = [
"ModelBase", "TextModel", "MmprojModel", "ModelType", "SentencePieceTokenTypes",
"get_model_architecture", "LazyTorchTensor", "logger",
"_mistral_common_installed", "_mistral_import_error_msg"
]
# List of all model module names (used for lazy loading)
_MODEL_MODULES = [
'gpt_neox', 'bloom', 'mpt', 'orion', 'baichuan', 'xverse', 'falcon', 'starcoder',
'refact', 'stable_lm', 'llama', 'arcee', 'mistral', 'mistral3', 'deci', 'bitnet', 'grok',
'dbrx', 'minicpm', 'qwen', 'qwen_vl', 'qwen3_vl', 'dream', 'llada', 'ernie',
'intern_vision', 'wav_tokenizer', 'gpt2', 'phi', 'plamo', 'codeshell', 'internlm',
'bert', 'gemma', 'rwkv', 'mamba', 'jamba', 'command_r', 'olmo', 'openelm',
'arctic', 'deepseek', 'minimax', 'pangu', 'dots1', 'plm', 't5', 'jais', 'glm',
'chatglm', 'nemotron', 'exaone', 'granite', 'bailing', 'grove', 'chameleon',
'ultravox', 'falcon_h1', 'hunyuan', 'smollm', 'gpt_oss', 'lfm2', 'small_thinker',
'apertus', 'pixtral', 'lighton_ocr', 'kimi_vl', 'cogvlm', 'janus_pro', 'llama4',
'smolvlm'
]
# Track which modules have been loaded
_loaded_modules = set()


# Function to load all model modules
def _load_all_models():
"""Import all model modules to trigger registration."""
if len(_loaded_modules) == len(_MODEL_MODULES):
return # Already loaded
for module_name in _MODEL_MODULES:
if module_name not in _loaded_modules:
try:
__import__(f"conversion.{module_name}")
_loaded_modules.add(module_name)
except Exception as e:
# Log but don't fail - some models might have issues
logger.warning(f"Failed to load model module {module_name}: {e}")


# Function to get a model class by name
def get_model_class(name: str, mmproj: bool = False):
"""
Dynamically import and return a model class by name.
This avoids circular dependencies by only importing when needed.
"""
# Map model names to their module and class name
model_map = {
# Text models
"LlamaModel": ("llama", "LlamaModel"),
"MistralModel": ("mistral", "MistralModel"),
"GPTNeoXModel": ("gpt_neox", "GPTNeoXModel"),
"BloomModel": ("bloom", "BloomModel"),
"MPTModel": ("mpt", "MPTModel"),
"OrionModel": ("orion", "OrionModel"),
"BaichuanModel": ("baichuan", "BaichuanModel"),
"XverseModel": ("xverse", "XverseModel"),
"FalconModel": ("falcon", "FalconModel"),
"StarCoderModel": ("starcoder", "StarCoderModel"),
"StarCoder2Model": ("starcoder", "StarCoder2Model"),
"RefactModel": ("refact", "RefactModel"),
"StableLMModel": ("stable_lm", "StableLMModel"),
"ArceeModel": ("arcee", "ArceeModel"),
"Mistral3Model": ("mistral3", "Mistral3Model"),
"DeciModel": ("deci", "DeciModel"),
"BitnetModel": ("bitnet", "BitnetModel"),
"GrokModel": ("grok", "GrokModel"),
"DbrxModel": ("dbrx", "DbrxModel"),
"MiniCPMModel": ("minicpm", "MiniCPMModel"),
"MiniCPM3Model": ("minicpm", "MiniCPM3Model"),
"QwenModel": ("qwen", "QwenModel"),
"Qwen2Model": ("qwen", "Qwen2Model"),
"Qwen2MoeModel": ("qwen", "Qwen2MoeModel"),
"Qwen3Model": ("qwen", "Qwen3Model"),
"Qwen3MoeModel": ("qwen", "Qwen3MoeModel"),
"Qwen25OmniModel": ("qwen_vl", "Qwen25OmniModel"),
"Qwen3VLTextModel": ("qwen3_vl", "Qwen3VLTextModel"),
"Qwen3VLMoeTextModel": ("qwen3_vl", "Qwen3VLMoeTextModel"),
"DreamModel": ("dream", "DreamModel"),
"LLaDAModel": ("llada", "LLaDAModel"),
"LLaDAMoEModel": ("llada", "LLaDAMoEModel"),
"Ernie4_5Model": ("ernie", "Ernie4_5Model"),
"Ernie4_5MoeModel": ("ernie", "Ernie4_5MoeModel"),
"InternVisionModel": ("intern_vision", "InternVisionModel"),
"WavTokenizerDecModel": ("wav_tokenizer", "WavTokenizerDecModel"),
"GPT2Model": ("gpt2", "GPT2Model"),
"Phi2Model": ("phi", "Phi2Model"),
"Phi3MiniModel": ("phi", "Phi3MiniModel"),
"PhiMoeModel": ("phi", "PhiMoeModel"),
"PlamoModel": ("plamo", "PlamoModel"),
"Plamo2Model": ("plamo", "Plamo2Model"),
"CodeShellModel": ("codeshell", "CodeShellModel"),
"InternLM2Model": ("internlm", "InternLM2Model"),
"InternLM3Model": ("internlm", "InternLM3Model"),
"BertModel": ("bert", "BertModel"),
"DistilBertModel": ("bert", "DistilBertModel"),
"RobertaModel": ("bert", "RobertaModel"),
"NomicBertModel": ("bert", "NomicBertModel"),
"NeoBert": ("bert", "NeoBert"),
"XLMRobertaModel": ("bert", "XLMRobertaModel"),
"JinaBertV2Model": ("bert", "JinaBertV2Model"),
"GemmaModel": ("gemma", "GemmaModel"),
"Gemma2Model": ("gemma", "Gemma2Model"),
"Gemma3Model": ("gemma", "Gemma3Model"),
"EmbeddingGemma": ("gemma", "EmbeddingGemma"),
"Gemma3NModel": ("gemma", "Gemma3NModel"),
"Rwkv6Model": ("rwkv", "Rwkv6Model"),
"RWKV6Qwen2Model": ("rwkv", "RWKV6Qwen2Model"),
"Rwkv7Model": ("rwkv", "Rwkv7Model"),
"ARwkv7Model": ("rwkv", "ARwkv7Model"),
"MambaModel": ("mamba", "MambaModel"),
"Mamba2Model": ("mamba", "Mamba2Model"),
"JambaModel": ("jamba", "JambaModel"),
"CommandR2Model": ("command_r", "CommandR2Model"),
"Cohere2Model": ("command_r", "Cohere2Model"),
"OlmoModel": ("olmo", "OlmoModel"),
"OlmoForCausalLM": ("olmo", "OlmoModel"),
"SeedOssModel": ("olmo", "SeedOssModel"),
"Olmo2Model": ("olmo", "Olmo2Model"),
"OlmoeModel": ("olmo", "OlmoeModel"),
"OpenELMModel": ("openelm", "OpenELMModel"),
"ArcticModel": ("arctic", "ArcticModel"),
"DeepseekModel": ("deepseek", "DeepseekModel"),
"DeepseekV2Model": ("deepseek", "DeepseekV2Model"),
"MiniMaxM2Model": ("minimax", "MiniMaxM2Model"),
"PanguEmbeddedModel": ("pangu", "PanguEmbeddedModel"),
"Dots1Model": ("dots1", "Dots1Model"),
"PLMModel": ("plm", "PLMModel"),
"T5Model": ("t5", "T5Model"),
"T5ForConditionalGeneration": ("t5", "T5Model"),
"T5WithLMHeadModel": ("t5", "T5Model"),
"T5EncoderModel": ("t5", "T5EncoderModel"),
"JaisModel": ("jais", "JaisModel"),
"Glm4Model": ("glm", "Glm4Model"),
"Glm4MoeModel": ("glm", "Glm4MoeModel"),
"ChatGLMModel": ("chatglm", "ChatGLMModel"),
"NemotronModel": ("nemotron", "NemotronModel"),
"NemotronHModel": ("nemotron", "NemotronHModel"),
"ExaoneModel": ("exaone", "ExaoneModel"),
"Exaone4Model": ("exaone", "Exaone4Model"),
"GraniteModel": ("granite", "GraniteModel"),
"GraniteMoeModel": ("granite", "GraniteMoeModel"),
"GraniteHybridModel": ("granite", "GraniteHybridModel"),
"BailingMoeModel": ("bailing", "BailingMoeModel"),
"BailingMoeV2Model": ("bailing", "BailingMoeV2Model"),
"GroveMoeModel": ("grove", "GroveMoeModel"),
"ChameleonModel": ("chameleon", "ChameleonModel"),
"HunYuanMoEModel": ("hunyuan", "HunYuanMoEModel"),
"HunYuanModel": ("hunyuan", "HunYuanModel"),
"SmolLM3Model": ("smollm", "SmolLM3Model"),
"GptOssModel": ("gpt_oss", "GptOssModel"),
"LFM2Model": ("lfm2", "LFM2Model"),
"LFM2MoeModel": ("lfm2", "LFM2MoeModel"),
"SmallThinkerModel": ("small_thinker", "SmallThinkerModel"),
"ApertusModel": ("apertus", "ApertusModel"),
"PixtralModel": ("pixtral", "PixtralModel"),
"LightOnOCRVisionModel": ("lighton_ocr", "LightOnOCRVisionModel"),
"KimiVLModel": ("kimi_vl", "KimiVLModel"),
"CogVLMModel": ("cogvlm", "CogVLMModel"),
"JanusProModel": ("janus_pro", "JanusProModel"),
# Multimodal models
"LlavaVisionModel": ("llava", "LlavaVisionModel"),
"SmolVLMModel": ("smolvlm", "SmolVLMModel"),
"Llama4Model": ("llama4", "Llama4Model"),
"Llama4VisionModel": ("llama4", "Llama4VisionModel"),
"Qwen2VLModel": ("qwen_vl", "Qwen2VLVisionModel"),
"Qwen2VLVisionModel": ("qwen_vl", "Qwen2VLVisionModel"),
"Qwen2_5_VLForConditionalGeneration": ("qwen_vl", "Qwen2VLVisionModel"),
"Qwen3VLVisionModel": ("qwen3_vl", "Qwen3VLVisionModel"),
"Gemma3VisionModel": ("gemma", "Gemma3VisionModel"),
"LFM2VLModel": ("lfm2", "LFM2VLModel"),
"UltravoxModel": ("ultravox", "UltravoxModel"),
"WhisperEncoderModel": ("ultravox", "WhisperEncoderModel"),
"UltravoxWhisperEncoderModel": ("ultravox", "UltravoxWhisperEncoderModel"),
"VoxtralWhisperEncoderModel": ("ultravox", "VoxtralWhisperEncoderModel"),
"FalconH1Model": ("falcon_h1", "FalconH1Model"),
"CogVLMVisionModel": ("cogvlm", "CogVLMVisionModel"),
"JanusProVisionModel": ("janus_pro", "JanusProVisionModel"),
}
if name not in model_map:
raise ValueError(f"Unknown model class: {name}")
module_name, class_name = model_map[name]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the class_name seems to always be equal to name, would it make sense to lighten the model_map to avoid having to write the class name twice every time?

module = __import__(f"conversion.{module_name}", fromlist=[class_name])
return getattr(module, class_name)
43 changes: 43 additions & 0 deletions conversion/apertus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from __future__ import annotations
from .base import (
ModelBase, gguf
)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
pass
from .llama import LlamaModel


@ModelBase.register("ApertusForCausalLM")
class ApertusModel(LlamaModel):
model_arch = gguf.MODEL_ARCH.APERTUS
undo_permute = False
_alpha_n = {}
_alpha_p = {}
_beta = {}
_eps = {}

def modify_tensors(self, data_torch, name, bid):
# Handle xIELU activation parameters
n_layers = self.hparams["num_hidden_layers"]
if name.endswith(".act_fn.alpha_n"):
self._alpha_n[bid] = data_torch.to("cpu").float().item()
if (len(self._alpha_n) == n_layers):
self.gguf_writer.add_xielu_alpha_n([self._alpha_n[k] for k in sorted(self._alpha_n)])
return []
if name.endswith(".act_fn.alpha_p"):
self._alpha_p[bid] = data_torch.to("cpu").float().item()
if (len(self._alpha_p) == n_layers):
self.gguf_writer.add_xielu_alpha_p([self._alpha_p[k] for k in sorted(self._alpha_p)])
return []
if name.endswith(".act_fn.beta"):
self._beta[bid] = data_torch.to("cpu").float().item()
if (len(self._beta) == n_layers):
self.gguf_writer.add_xielu_beta([self._beta[k] for k in sorted(self._beta)])
return []
if name.endswith(".act_fn.eps"):
self._eps[bid] = data_torch.to("cpu").float().item()
if (len(self._eps) == n_layers):
self.gguf_writer.add_xielu_eps([self._eps[k] for k in sorted(self._eps)])
return []
return super().modify_tensors(data_torch, name, bid)
22 changes: 22 additions & 0 deletions conversion/arcee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from __future__ import annotations
from .base import (
ModelBase, gguf
)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
pass
from .llama import LlamaModel


@ModelBase.register("ArceeForCausalLM")
class ArceeModel(LlamaModel):
model_arch = gguf.MODEL_ARCH.ARCEE

def set_gguf_parameters(self):
super().set_gguf_parameters()
self._try_set_pooling_type()
rope_scaling = self.hparams.get("rope_scaling") or {}
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
131 changes: 131 additions & 0 deletions conversion/arctic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations
import json
import sys
from .base import (
ModelBase, TextModel, SentencePieceTokenTypes,
gguf, torch, logger
)
from .llama import LlamaModel
from typing import TYPE_CHECKING, Iterable
if TYPE_CHECKING:
from torch import Tensor


@ModelBase.register("ArcticForCausalLM")
class ArcticModel(TextModel):
model_arch = gguf.MODEL_ARCH.ARCTIC

def set_vocab(self):
# The reason for using a custom implementation here is that the
# snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
# tokenizer.model and used them as BOS and EOS instead of adding new tokens.
from sentencepiece import SentencePieceProcessor
tokenizer_path = self.dir_model / 'tokenizer.model'
if not tokenizer_path.is_file():
logger.error(f'Error: Missing {tokenizer_path}')
sys.exit(1)
# Read the whole vocabulary from the tokenizer.model file
tokenizer = SentencePieceProcessor()
tokenizer.LoadFromFile(str(tokenizer_path))
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
scores: list[float] = [-10000.0] * vocab_size
toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size
for token_id in range(tokenizer.vocab_size()):
piece = tokenizer.IdToPiece(token_id)
text = piece.encode("utf-8")
score = tokenizer.GetScore(token_id)
toktype = SentencePieceTokenTypes.NORMAL
if tokenizer.IsUnknown(token_id):
toktype = SentencePieceTokenTypes.UNKNOWN
elif tokenizer.IsControl(token_id):
toktype = SentencePieceTokenTypes.CONTROL
elif tokenizer.IsUnused(token_id):
toktype = SentencePieceTokenTypes.UNUSED
elif tokenizer.IsByte(token_id):
toktype = SentencePieceTokenTypes.BYTE
tokens[token_id] = text
scores[token_id] = score
toktypes[token_id] = toktype
# Use the added_tokens_decoder field from tokeniser_config.json as the source
# of information about added/redefined tokens and modify them accordingly.
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
if tokenizer_config_file.is_file():
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
tokenizer_config_json = json.load(f)
if "added_tokens_decoder" in tokenizer_config_json:
added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"]
for token_id, token_json in added_tokens_decoder.items():
token_id = int(token_id)
if token_id >= vocab_size:
logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
continue
token_content = token_json["content"]
token_type = SentencePieceTokenTypes.USER_DEFINED
token_score = -10000.0
# Map unk_token to UNKNOWN, other special tokens to CONTROL
# Set the score to 0.0 as in the original tokenizer.model
if ("special" in token_json) and token_json["special"]:
if token_content == tokenizer_config_json["unk_token"]:
token_type = SentencePieceTokenTypes.UNKNOWN
else:
token_type = SentencePieceTokenTypes.CONTROL
token_score = 0.0
logger.info(f"Setting added token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")
tokens[token_id] = token_content.encode("utf-8")
toktypes[token_id] = token_type
scores[token_id] = token_score
self.gguf_writer.add_tokenizer_model("llama")
self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
special_vocab.add_to_gguf(self.gguf_writer)

def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
_experts: list[dict[str, Tensor]] | None = None

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")
if name.endswith("q_proj.weight"):
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
if name.endswith("k_proj.weight"):
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
# process the experts separately
if name.find("block_sparse_moe.experts") != -1:
n_experts = self.hparams["num_local_experts"]
assert bid is not None
if self._experts is None:
self._experts = [{} for _ in range(self.block_count)]
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
tensors: list[tuple[str, Tensor]] = []
# merge the experts into a single 3d tensor
for wid in ["w1", "w2", "w3"]:
datas: list[Tensor] = []
for xid in range(n_experts):
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid}.weight"
datas.append(self._experts[bid][ename])
del self._experts[bid][ename]
data_torch = torch.stack(datas, dim=0)
merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
new_name = self.map_tensor_name(merged_name)
tensors.append((new_name, data_torch))
return tensors
else:
return []
return [(self.map_tensor_name(name), data_torch)]

def prepare_tensors(self):
super().prepare_tensors()
if self._experts is not None:
# flatten `list[dict[str, Tensor]]` into `list[str]`
experts = [k for d in self._experts for k in d.keys()]
if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}")
Loading
Loading