From c1104127ac5ba1fcaa2f09b454de6fd616834c4b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 29 Dec 2023 10:37:07 +0200 Subject: [PATCH] cuda : ggml_mul_mat assert for padded src1 --- src/ggml-cuda.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ggml-cuda.cu b/src/ggml-cuda.cu index 9a9effcf5..84e302784 100644 --- a/src/ggml-cuda.cu +++ b/src/ggml-cuda.cu @@ -8529,6 +8529,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const if (src1->type != GGML_TYPE_F16) { const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type); const int64_t ne_src1 = ggml_nelements(src1); + GGML_ASSERT(ne_src1 == ggml_nbytes(src1)/ggml_type_size(src1->type)); src1_f16_alloc.alloc(ne_src1); GGML_ASSERT(to_fp16_cuda != nullptr); to_fp16_cuda(src1_ddf, src1_f16_alloc.get(), ne_src1, main_stream);