Skip to content

Commit 500683a

Browse files
author
liyang
committed
remove unused fused QKV mapping
1 parent 60b6b63 commit 500683a

File tree

5 files changed

+25
-50
lines changed

5 files changed

+25
-50
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6875,38 +6875,11 @@ def _map_block_tensor(self, layer: int, rest: str, data_torch: Tensor, name: str
68756875
suffix = parts[-1]
68766876
return [(f'v.blk.{layer}.attn_ln.{suffix}', data_torch)]
68776877

6878-
# fused qkv
6879-
if rest == 'attn.qkv.weight':
6880-
w = data_torch
6881-
wdim = w.shape[0]
6882-
if wdim % 3 != 0:
6883-
logger.warning('mmproj(jinaclip): unexpected qkv weight shape %s for %s', tuple(w.shape), name)
6884-
d = wdim // 3
6885-
q, k, v = w[0:d, :], w[d:2 * d, :], w[2 * d:, :]
6886-
return [
6887-
(f'v.blk.{layer}.attn_q.weight', q),
6888-
(f'v.blk.{layer}.attn_k.weight', k),
6889-
(f'v.blk.{layer}.attn_v.weight', v),
6890-
]
6891-
if rest == 'attn.qkv.bias':
6892-
b = data_torch
6893-
bdim = b.shape[0]
6894-
if bdim % 3 != 0:
6895-
logger.warning('mmproj(jinaclip): unexpected qkv bias shape %s for %s', tuple(b.shape), name)
6896-
d = bdim // 3
6897-
qb, kb, vb = b[0:d], b[d:2 * d], b[2 * d:]
6898-
return [
6899-
(f'v.blk.{layer}.attn_q.bias', qb),
6900-
(f'v.blk.{layer}.attn_k.bias', kb),
6901-
(f'v.blk.{layer}.attn_v.bias', vb),
6902-
]
6903-
# separate q/v bias (some checkpoints)
69046878
if rest == 'attn.q_bias':
69056879
return [(f'v.blk.{layer}.attn_q.bias', data_torch)]
69066880
if rest == 'attn.v_bias':
69076881
return [(f'v.blk.{layer}.attn_v.bias', data_torch)]
69086882

6909-
# separate projections
69106883
if rest.startswith('attn.q_proj.'):
69116884
suffix = parts[-1]
69126885
return [(f'v.blk.{layer}.attn_q.{suffix}', data_torch)]

tools/mtmd/clip.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,15 +2372,15 @@ struct clip_graph {
23722372
}
23732373

23742374
ggml_tensor * build_ffn(
2375-
ggml_tensor * cur,
2376-
ggml_tensor * up,
2377-
ggml_tensor * up_b,
2378-
ggml_tensor * gate,
2379-
ggml_tensor * gate_b,
2380-
ggml_tensor * down,
2381-
ggml_tensor * down_b,
2382-
ffn_op_type type_op,
2383-
int il) const {
2375+
ggml_tensor * cur,
2376+
ggml_tensor * up,
2377+
ggml_tensor * up_b,
2378+
ggml_tensor * gate,
2379+
ggml_tensor * gate_b,
2380+
ggml_tensor * down,
2381+
ggml_tensor * down_b,
2382+
ffn_op_type type_op,
2383+
int il) const {
23842384

23852385
ggml_tensor * tmp = up ? ggml_mul_mat(ctx0, up, cur) : cur;
23862386
cb(tmp, "ffn_up", il);
@@ -2467,14 +2467,14 @@ struct clip_graph {
24672467
}
24682468

24692469
ggml_tensor * build_attn(
2470-
ggml_tensor * wo,
2471-
ggml_tensor * wo_b,
2472-
ggml_tensor * q_cur,
2473-
ggml_tensor * k_cur,
2474-
ggml_tensor * v_cur,
2475-
ggml_tensor * kq_mask,
2476-
float kq_scale,
2477-
int il) const {
2470+
ggml_tensor * wo,
2471+
ggml_tensor * wo_b,
2472+
ggml_tensor * q_cur,
2473+
ggml_tensor * k_cur,
2474+
ggml_tensor * v_cur,
2475+
ggml_tensor * kq_mask,
2476+
float kq_scale,
2477+
int il) const {
24782478
// these nodes are added to the graph together so that they are not reordered
24792479
// by doing so, the number of splits in the graph is reduced
24802480
ggml_build_forward_expand(gf, q_cur);

tools/mtmd/mtmd-cli.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ static int run_mmproj_only(common_params & params) {
175175
if (params.mmproj.path.empty() || params.image.empty()) return -1;
176176
mtmd_context_params ctx_params = mtmd_context_params_default();
177177
ctx_params.use_gpu = params.mmproj_use_gpu;
178-
ctx_params.verbosity = (params.verbosity > 0) ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
179178
mtmd_mmproj_context * mctx = mtmd_mmproj_init(params.mmproj.path.c_str(), ctx_params);
180179
if (!mctx) {
181180
LOG_ERR("[ERROR] Failed to load vision mmproj: %s\n", params.mmproj.path.c_str());

tools/mtmd/mtmd.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,13 @@ struct mtmd_mmproj_context {
406406
};
407407

408408
mtmd_mmproj_context * mtmd_mmproj_init(const char * mmproj_fname,
409-
const struct mtmd_context_params ctx_params) {
410-
clip_context_params clip_params;
411-
clip_params.use_gpu = ctx_params.use_gpu;
412-
clip_params.verbosity = ctx_params.verbosity;
409+
const struct mtmd_context_params ctx_params) {
410+
clip_context_params clip_params {
411+
/* use_gpu */ ctx_params.use_gpu,
412+
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
413+
/* image_min_tokens */ ctx_params.image_min_tokens,
414+
/* image_max_tokens */ ctx_params.image_max_tokens,
415+
};
413416
auto res = clip_init(mmproj_fname, clip_params);
414417
if (!res.ctx_v) {
415418
return nullptr;

tools/mtmd/mtmd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ MTMD_API int mtmd_mmproj_get_image_size (struct mtmd_mmproj_context * ctx);
233233
MTMD_API int mtmd_mmproj_get_patch_size (struct mtmd_mmproj_context * ctx);
234234
MTMD_API int mtmd_mmproj_get_hidden_size(struct mtmd_mmproj_context * ctx);
235235
MTMD_API bool mtmd_mmproj_is_jinaclip (struct mtmd_mmproj_context * ctx);
236-
// generic support check for projector-only encode path
236+
// generic support check for projector-only encode
237237
MTMD_API bool mtmd_mmproj_is_supported (struct mtmd_mmproj_context * ctx);
238238

239239
// encode a bitmap (RGB) to projector embeddings

0 commit comments

Comments
 (0)