Skip to content

Commit 553c08b

Browse files
ikawrakowIwan Kawrakow
andauthored
Better CPU FA performance for DeepSeek-Lite (#410)
* Better CPU FA performance for DeepSeek-Lite * It must be like this --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent 4ba6bbb commit 553c08b

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

ggml/src/iqk/iqk_mul_mat.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17242,7 +17242,7 @@ struct FlashAttn {
1724217242
q_size = GGML_PAD(q_size, 64);
1724317243
if (q_size > kMaxOnStackSize) {
1724417244
auto qptr = get_q_storage(q_size);
17245-
if (nq1 >= 8) {
17245+
if (false && nq1 >= 8) {
1724617246
if constexpr (std::is_same_v<KHelper, HelperQ80<Dk, k_step>>) {
1724717247
#if FA_TIMING
1724817248
auto t1 = Perf::cur_time();
@@ -17929,6 +17929,12 @@ inline void iqk_deepseek_helper(KHelper& kh, VHelper& vh,
1792917929
if (M && S) { M += n; S += n; }
1793017930
return false;
1793117931
};
17932+
if (nq1 >= 16) {
17933+
int n_step = nq1/16;
17934+
FlashAttn<576, 512, 16, step_k> fa(scale, softcap);
17935+
fa.compute(kh, vh, 16*n_step, nk1, stride_q, stride_m, stride_qkv, q, mask, qkv, M, S);
17936+
if (update(16*n_step)) return;
17937+
}
1793217938
if (nq1 >= 8) {
1793317939
int n_step = nq1/8;
1793417940
FlashAttn<576, 512, 8, step_k> fa(scale, softcap);

0 commit comments

Comments
 (0)