remove unused fused QKV mapping

liyang · liyang · commit 500683a15ee6 · 2025-11-19T16:55:57.000+08:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -6875,38 +6875,11 @@ def _map_block_tensor(self, layer: int, rest: str, data_torch: Tensor, name: str
             suffix = parts[-1]
             return [(f'v.blk.{layer}.attn_ln.{suffix}', data_torch)]
 
-        # fused qkv
-        if rest == 'attn.qkv.weight':
-            w = data_torch
-            wdim = w.shape[0]
-            if wdim % 3 != 0:
-                logger.warning('mmproj(jinaclip): unexpected qkv weight shape %s for %s', tuple(w.shape), name)
-            d = wdim // 3
-            q, k, v = w[0:d, :], w[d:2 * d, :], w[2 * d:, :]
-            return [
-                (f'v.blk.{layer}.attn_q.weight', q),
-                (f'v.blk.{layer}.attn_k.weight', k),
-                (f'v.blk.{layer}.attn_v.weight', v),
-            ]
-        if rest == 'attn.qkv.bias':
-            b = data_torch
-            bdim = b.shape[0]
-            if bdim % 3 != 0:
-                logger.warning('mmproj(jinaclip): unexpected qkv bias shape %s for %s', tuple(b.shape), name)
-            d = bdim // 3
-            qb, kb, vb = b[0:d], b[d:2 * d], b[2 * d:]
-            return [
-                (f'v.blk.{layer}.attn_q.bias', qb),
-                (f'v.blk.{layer}.attn_k.bias', kb),
-                (f'v.blk.{layer}.attn_v.bias', vb),
-            ]
-        # separate q/v bias (some checkpoints)
         if rest == 'attn.q_bias':
             return [(f'v.blk.{layer}.attn_q.bias', data_torch)]
         if rest == 'attn.v_bias':
             return [(f'v.blk.{layer}.attn_v.bias', data_torch)]
 
-        # separate projections
         if rest.startswith('attn.q_proj.'):
             suffix = parts[-1]
             return [(f'v.blk.{layer}.attn_q.{suffix}', data_torch)]
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
@@ -2372,15 +2372,15 @@ struct clip_graph {
     }
 
     ggml_tensor * build_ffn(
-                            ggml_tensor * cur,
-                            ggml_tensor * up,
-                            ggml_tensor * up_b,
-                            ggml_tensor * gate,
-                            ggml_tensor * gate_b,
-                            ggml_tensor * down,
-                            ggml_tensor * down_b,
-                            ffn_op_type type_op,
-                            int il) const {
+            ggml_tensor * cur,
+            ggml_tensor * up,
+            ggml_tensor * up_b,
+            ggml_tensor * gate,
+            ggml_tensor * gate_b,
+            ggml_tensor * down,
+            ggml_tensor * down_b,
+            ffn_op_type type_op,
+            int il) const {
 
         ggml_tensor * tmp = up ? ggml_mul_mat(ctx0, up, cur) : cur;
         cb(tmp, "ffn_up", il);
@@ -2467,14 +2467,14 @@ struct clip_graph {
     }
 
     ggml_tensor * build_attn(
-                            ggml_tensor * wo,
-                             ggml_tensor * wo_b,
-                             ggml_tensor * q_cur,
-                             ggml_tensor * k_cur,
-                             ggml_tensor * v_cur,
-                             ggml_tensor * kq_mask,
-                             float kq_scale,
-                             int il) const {
+            ggml_tensor * wo,
+            ggml_tensor * wo_b,
+            ggml_tensor * q_cur,
+            ggml_tensor * k_cur,
+            ggml_tensor * v_cur,
+            ggml_tensor * kq_mask,
+            float kq_scale,
+            int il) const {
         // these nodes are added to the graph together so that they are not reordered
         // by doing so, the number of splits in the graph is reduced
         ggml_build_forward_expand(gf, q_cur);
diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp
@@ -175,7 +175,6 @@ static int run_mmproj_only(common_params & params) {
     if (params.mmproj.path.empty() || params.image.empty()) return -1;
     mtmd_context_params ctx_params = mtmd_context_params_default();
     ctx_params.use_gpu   = params.mmproj_use_gpu;
-    ctx_params.verbosity = (params.verbosity > 0) ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
     mtmd_mmproj_context * mctx = mtmd_mmproj_init(params.mmproj.path.c_str(), ctx_params);
     if (!mctx) {
         LOG_ERR("[ERROR] Failed to load vision mmproj: %s\n", params.mmproj.path.c_str());
diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp
@@ -406,10 +406,13 @@ struct mtmd_mmproj_context {
 };
 
 mtmd_mmproj_context * mtmd_mmproj_init(const char * mmproj_fname,
-                                        const struct mtmd_context_params ctx_params) {
-    clip_context_params clip_params;
-    clip_params.use_gpu   = ctx_params.use_gpu;
-    clip_params.verbosity = ctx_params.verbosity;
+                                       const struct mtmd_context_params ctx_params) {
+    clip_context_params clip_params {
+        /* use_gpu           */ ctx_params.use_gpu,
+        /* flash_attn_type   */ CLIP_FLASH_ATTN_TYPE_AUTO,
+        /* image_min_tokens  */ ctx_params.image_min_tokens,
+        /* image_max_tokens  */ ctx_params.image_max_tokens,
+    };
     auto res = clip_init(mmproj_fname, clip_params);
     if (!res.ctx_v) {
         return nullptr;
diff --git a/tools/mtmd/mtmd.h b/tools/mtmd/mtmd.h
@@ -233,7 +233,7 @@ MTMD_API int  mtmd_mmproj_get_image_size (struct mtmd_mmproj_context * ctx);
 MTMD_API int  mtmd_mmproj_get_patch_size (struct mtmd_mmproj_context * ctx);
 MTMD_API int  mtmd_mmproj_get_hidden_size(struct mtmd_mmproj_context * ctx);
 MTMD_API bool mtmd_mmproj_is_jinaclip    (struct mtmd_mmproj_context * ctx);
-// generic support check for projector-only encode path
+// generic support check for projector-only encode
 MTMD_API bool mtmd_mmproj_is_supported   (struct mtmd_mmproj_context * ctx);
 
 // encode a bitmap (RGB) to projector embeddings