@@ -1531,7 +1531,9 @@ class MmprojModel(ModelBase):
15311531 preprocessor_config : dict [str , Any ]
15321532 global_config : dict [str , Any ]
15331533
1534- n_block_keys = ["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" , "depth" ]
1534+ # Prefer explicit "layers" (e.g. JinaCLIP),
1535+ # keep legacy keys for other models.
1536+ n_block_keys = ["layers" , "n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" , "depth" ]
15351537
15361538 has_vision_encoder : bool = True # by default
15371539 has_audio_encoder : bool = False
@@ -6805,6 +6807,11 @@ def __init__(self, *args, **kwargs):
68056807 with open (config_path , encoding = "utf-8" ) as f :
68066808 self .vision_config = json .load (f )
68076809
6810+ def get_vision_config (self ) -> dict [str , Any ] | None :
6811+ # For JinaCLIPVisionModel, the top-level AutoConfig dict is already
6812+ # the vision-only configuration.
6813+ return self .global_config
6814+
68086815 def set_vocab (self ):
68096816 # Vision encoder doesn't need vocabulary
68106817 pass
@@ -6862,73 +6869,10 @@ def set_gguf_parameters(self):
68626869 def _strip_vm_prefix (self , name : str ) -> str :
68636870 return name [len ('vision_model.' ):] if name .startswith ('vision_model.' ) else name
68646871
6865- def _map_block_tensor (self , layer : int , rest : str , data_torch : Tensor , name : str ) -> list [tuple [str , Tensor ]] | None :
6866- parts = rest .split ('.' )
6867- # layer norms
6868- if rest .startswith ('norm1.' ):
6869- suffix = parts [- 1 ]
6870- return [(f'v.blk.{ layer } .ln1.{ suffix } ' , data_torch )]
6871- if rest .startswith ('norm2.' ):
6872- suffix = parts [- 1 ]
6873- return [(f'v.blk.{ layer } .ln2.{ suffix } ' , data_torch )]
6874- if rest .startswith ('attn.inner_attn_ln.' ):
6875- suffix = parts [- 1 ]
6876- return [(f'v.blk.{ layer } .attn_ln.{ suffix } ' , data_torch )]
6877-
6878- if rest == 'attn.q_bias' :
6879- return [(f'v.blk.{ layer } .attn_q.bias' , data_torch )]
6880- if rest == 'attn.v_bias' :
6881- return [(f'v.blk.{ layer } .attn_v.bias' , data_torch )]
6882-
6883- if rest .startswith ('attn.q_proj.' ):
6884- suffix = parts [- 1 ]
6885- return [(f'v.blk.{ layer } .attn_q.{ suffix } ' , data_torch )]
6886- if rest .startswith ('attn.k_proj.' ):
6887- suffix = parts [- 1 ]
6888- return [(f'v.blk.{ layer } .attn_k.{ suffix } ' , data_torch )]
6889- if rest .startswith ('attn.v_proj.' ):
6890- suffix = parts [- 1 ]
6891- return [(f'v.blk.{ layer } .attn_v.{ suffix } ' , data_torch )]
6892- if rest .startswith ('attn.proj.' ):
6893- suffix = parts [- 1 ]
6894- return [(f'v.blk.{ layer } .attn_out.{ suffix } ' , data_torch )]
6895-
6896- # MLP
6897- if rest .startswith ('mlp.w1.' ):
6898- suffix = parts [- 1 ]
6899- return [(f'v.blk.{ layer } .ffn_gate.{ suffix } ' , data_torch )]
6900- if rest .startswith ('mlp.w2.' ):
6901- suffix = parts [- 1 ]
6902- return [(f'v.blk.{ layer } .ffn_up.{ suffix } ' , data_torch )]
6903- if rest .startswith ('mlp.w3.' ):
6904- suffix = parts [- 1 ]
6905- return [(f'v.blk.{ layer } .ffn_down.{ suffix } ' , data_torch )]
6906- if rest .startswith ('mlp.ffn_ln.' ):
6907- suffix = parts [- 1 ]
6908- return [(f'v.blk.{ layer } .ffn_norm.{ suffix } ' , data_torch )]
6909- if rest .startswith ('mlp.fc1.' ):
6910- suffix = parts [- 1 ]
6911- return [(f'v.blk.{ layer } .ffn_up.{ suffix } ' , data_torch )]
6912- if rest .startswith ('mlp.fc2.' ):
6913- suffix = parts [- 1 ]
6914- return [(f'v.blk.{ layer } .ffn_down.{ suffix } ' , data_torch )]
6915- return None
6916-
69176872 def map_tensor_name (self , name : str , try_suffixes : Sequence [str ] = (".weight" , ".bias" )) -> str :
6918- """Prefer base table-driven mapping; keep Jina-specific targets if already mapped; fallback to legacy mapper."""
6919- # Already a GGUF target name (e.g., "v.*" or "mm.*"): return as-is
69206873 if name .startswith ('v.' ) or name .startswith ('mm.' ):
69216874 return name
6922- # Try the base mapping first
6923- try :
6924- return super ().map_tensor_name (name , try_suffixes = try_suffixes )
6925- except Exception :
6926- # Fallback to legacy Jina-specific mapper for any remaining edge keys
6927- if hasattr (self , "_map_jinaclip_tensor_name" ):
6928- mapped = self ._map_jinaclip_tensor_name (name ) # type: ignore[attr-defined]
6929- if mapped :
6930- return mapped
6931- return name
6875+ return super ().map_tensor_name (name , try_suffixes = try_suffixes )
69326876
69336877 def get_tensors (self ) -> Iterator [tuple [str , Tensor ]]:
69346878 yielded_any = False
@@ -6967,39 +6911,10 @@ def _should_be_f32(self, gguf_name: str) -> bool:
69676911 return any (p in gguf_name for p in patterns )
69686912
69696913 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
6970- del bid # unused
6971-
6972- src = name
6973- if src .startswith ('v.' ) or src .startswith ('mm.' ):
6974- return [(src , data_torch )]
6975-
6976- # Drop 'vision_model.' prefix if present
6977- src_no_vm = self ._strip_vm_prefix (src )
6978-
6979- # Top-level direct mappings — use gguf constants directly for canonical names
6980- if src_no_vm == 'cls_token' :
6981- base = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_ENC_EMBD_CLS ]
6982- return [(base , data_torch )]
6983- if src_no_vm .startswith ('patch_embed.proj.' ):
6984- suffix = src_no_vm .split ('.' )[- 1 ]
6985- base = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_ENC_EMBD_PATCH ]
6986- return [(f'{ base } .{ suffix } ' , data_torch )]
6987- if src_no_vm == 'pos_embed' :
6914+ # keep only pos_embed special case (no .weight suffix); all other tensors use table-driven mapping
6915+ if name == 'pos_embed' :
69886916 pos_name = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_ENC_EMBD_POS ] + '.weight'
69896917 return [(pos_name , data_torch )]
6990- if src_no_vm .startswith ('norm.' ):
6991- suffix = src_no_vm .split ('.' )[- 1 ]
6992- base = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_POST_NORM ]
6993- return [(f'{ base } .{ suffix } ' , data_torch )]
6994-
6995- if src_no_vm .startswith ('blocks.' ):
6996- parts = src_no_vm .split ('.' )
6997- if len (parts ) >= 3 and parts [1 ].isdigit ():
6998- layer = int (parts [1 ])
6999- rest = '.' .join (parts [2 :])
7000- mapped = self ._map_block_tensor (layer , rest , data_torch , name )
7001- if mapped is not None :
7002- return mapped
70036918
70046919 try :
70056920 return [(self .map_tensor_name (name ), data_torch )]
0 commit comments