Skip to content

Commit 299f5d7

Browse files
authored
CUDA: properly handle nb00=nb02 case for cpy (#17081)
1 parent ac76d36 commit 299f5d7

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

ggml/src/ggml-cuda/cpy.cu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,16 +198,14 @@ static void ggml_cpy_flt_cuda(
198198
if (transposed) {
199199
GGML_ASSERT(ne == ne00*ne01*ne02); // ne[3] is 1 assumed
200200
int ne00n, ne01n, ne02n;
201-
if (nb00 < nb02) {
201+
if (nb00 <= nb02) { // most likely safe to handle nb00 = nb02 case here
202202
ne00n = ne00;
203203
ne01n = ne01;
204204
ne02n = ne02;
205205
} else if (nb00 > nb02) {
206206
ne00n = ne00;
207207
ne01n = ne01*ne02;
208208
ne02n = 1;
209-
} else {
210-
GGML_ASSERT(false);
211209
}
212210

213211
dim3 dimGrid( (ne01n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D,

tests/test-backend-ops.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6648,6 +6648,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
66486648
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
66496649
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
66506650
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
6651+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {256, 1, 4, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
66516652

66526653
test_cases.emplace_back(new test_cont());
66536654
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));

0 commit comments

Comments
 (0)