Skip to content

Commit 115c0c6

Browse files
authored
[X86][test] Remove useless pattern for VDPBF16PSZmb and add a test for broadcast folding (llvm#80629)
llvm-issue: llvm#68810
1 parent 4926f12 commit 115c0c6

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12716,13 +12716,13 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1271612716
(src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
1271712717
Sched<[sched.Folded, sched.ReadAfterFold]>;
1271812718

12719+
let mayLoad = 1, hasSideEffects = 0 in
1271912720
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1272012721
(ins src_v.RC:$src2, f32mem:$src3),
1272112722
OpcodeStr,
1272212723
!strconcat("${src3}", _.BroadcastStr,", $src2"),
1272312724
!strconcat("$src2, ${src3}", _.BroadcastStr),
12724-
(_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12725-
(src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12725+
(null_frag)>,
1272612726
EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
1272712727

1272812728
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 < %s | FileCheck %s
3+
4+
define <16 x float> @mm512_dpbf16_ps_broadcast_rhs(<16 x float> noundef %acc, <32 x bfloat> noundef %lhs, ptr nocapture noundef readonly %rhs) {
5+
; CHECK-LABEL: mm512_dpbf16_ps_broadcast_rhs:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vdpbf16ps (%rdi){1to16}, %zmm1, %zmm0
8+
; CHECK-NEXT: retq
9+
entry:
10+
%0 = load float, ptr %rhs, align 4
11+
%vecinit.i = insertelement <16 x float> poison, float %0, i64 0
12+
%vecinit15.i = shufflevector <16 x float> %vecinit.i, <16 x float> poison, <16 x i32> zeroinitializer
13+
%1 = bitcast <16 x float> %vecinit15.i to <32 x bfloat>
14+
%2 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %acc, <32 x bfloat> %lhs, <32 x bfloat> %1)
15+
ret <16 x float> %2
16+
}
17+
18+
declare <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float>, <32 x bfloat>, <32 x bfloat>)

0 commit comments

Comments
 (0)