@@ -6874,38 +6874,11 @@ def _map_block_tensor(self, layer: int, rest: str, data_torch: Tensor, name: str
68746874 suffix = parts [- 1 ]
68756875 return [(f'v.blk.{ layer } .attn_ln.{ suffix } ' , data_torch )]
68766876
6877- # fused qkv
6878- if rest == 'attn.qkv.weight' :
6879- w = data_torch
6880- wdim = w .shape [0 ]
6881- if wdim % 3 != 0 :
6882- logger .warning ('mmproj(jinaclip): unexpected qkv weight shape %s for %s' , tuple (w .shape ), name )
6883- d = wdim // 3
6884- q , k , v = w [0 :d , :], w [d :2 * d , :], w [2 * d :, :]
6885- return [
6886- (f'v.blk.{ layer } .attn_q.weight' , q ),
6887- (f'v.blk.{ layer } .attn_k.weight' , k ),
6888- (f'v.blk.{ layer } .attn_v.weight' , v ),
6889- ]
6890- if rest == 'attn.qkv.bias' :
6891- b = data_torch
6892- bdim = b .shape [0 ]
6893- if bdim % 3 != 0 :
6894- logger .warning ('mmproj(jinaclip): unexpected qkv bias shape %s for %s' , tuple (b .shape ), name )
6895- d = bdim // 3
6896- qb , kb , vb = b [0 :d ], b [d :2 * d ], b [2 * d :]
6897- return [
6898- (f'v.blk.{ layer } .attn_q.bias' , qb ),
6899- (f'v.blk.{ layer } .attn_k.bias' , kb ),
6900- (f'v.blk.{ layer } .attn_v.bias' , vb ),
6901- ]
6902- # separate q/v bias (some checkpoints)
69036877 if rest == 'attn.q_bias' :
69046878 return [(f'v.blk.{ layer } .attn_q.bias' , data_torch )]
69056879 if rest == 'attn.v_bias' :
69066880 return [(f'v.blk.{ layer } .attn_v.bias' , data_torch )]
69076881
6908- # separate projections
69096882 if rest .startswith ('attn.q_proj.' ):
69106883 suffix = parts [- 1 ]
69116884 return [(f'v.blk.{ layer } .attn_q.{ suffix } ' , data_torch )]
0 commit comments