Skip to content

Commit e930493

Browse files
JohannesGaessleriThalay
authored andcommitted
Fix FlashAttention debug test, FP32 assert (llama/7684)
1 parent 0f9e522 commit e930493

File tree

1 file changed

+0
-4
lines changed

1 file changed

+0
-4
lines changed

ggml-cuda/fattn-vec-f32.cuh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,10 @@ void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx,
278278

279279
template <int D, ggml_type type_K, ggml_type type_V>
280280
void ggml_cuda_flash_attn_ext_vec_f32_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
281-
ggml_tensor * KQV = dst;
282281
ggml_tensor * Q = dst->src[0];
283282
ggml_tensor * K = dst->src[1];
284283
ggml_tensor * V = dst->src[2];
285284

286-
const int32_t precision = KQV->op_params[2];
287-
GGML_ASSERT(precision == GGML_PREC_DEFAULT);
288-
289285
GGML_ASSERT(K->type == type_K);
290286
GGML_ASSERT(V->type == type_V);
291287

0 commit comments

Comments
 (0)