Skip to content

Commit 3ac6753

Browse files
authored
llama-graph : use ggml_repeat_4d (#13998)
1 parent 0b4be4c commit 3ac6753

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

src/llama-graph.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -769,9 +769,8 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
769769
cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens);
770770

771771
if (weight_before_ffn) {
772-
// TODO: this is a workaround as we don't yet have a repeat op that takes custom dim (ggml_repeat_4d)
773-
ggml_tensor * repeated = ggml_new_tensor_3d(ctx0, cur->type, n_embd, n_expert_used, n_tokens);
774-
repeated = ggml_repeat(ctx0, cur, repeated); // [n_embd, n_expert_used, n_tokens]
772+
// repeat cur to [n_embd, n_expert_used, n_tokens]
773+
ggml_tensor * repeated = ggml_repeat_4d(ctx0, cur, n_embd, n_expert_used, n_tokens, 1);
775774
cur = ggml_mul(ctx0, repeated, weights);
776775
cb(cur, "ffn_moe_weighted", il);
777776
}

0 commit comments

Comments
 (0)