Skip to content

Commit 632ad09

Browse files
committed
update tests
1 parent 04acab4 commit 632ad09

9 files changed

+60
-60
lines changed

llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,16 @@ define amdgpu_kernel void @writelane(ptr addrspace(1) %out) #0 {
6363
ret void
6464
}
6565

66-
; CHECK: DIVERGENT: %tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v16f16.v8f32(<16 x half> %A, <16 x half> %B, <8 x float> %C)
66+
; CHECK: DIVERGENT: %tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> %A, <16 x half> %B, <8 x float> %C)
6767
define amdgpu_kernel void @wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
68-
%tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v16f16.v8f32(<16 x half> %A, <16 x half> %B, <8 x float> %C)
68+
%tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half> %A, <16 x half> %B, <8 x float> %C)
6969
store <8 x float> %tmp0, ptr addrspace(1) %out, align 32
7070
ret void
7171
}
7272

73-
; CHECK: DIVERGENT: %tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v16i16.v8f32(<16 x i16> %A, <16 x i16> %B, <8 x float> %C)
73+
; CHECK: DIVERGENT: %tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C)
7474
define amdgpu_kernel void @wmma_f32_16x16x16_ibf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
75-
%tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v16i16.v8f32(<16 x i16> %A, <16 x i16> %B, <8 x float> %C)
75+
%tmp0 = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C)
7676
store <8 x float> %tmp0, ptr addrspace(1) %out, align 32
7777
ret void
7878
}
@@ -93,18 +93,18 @@ bb:
9393
ret void
9494
}
9595

96-
; CHECK: DIVERGENT: %tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v8i32(i1 false, <4 x i32> %A, i1 false, <4 x i32> %B, <8 x i32> %C, i1 false)
96+
; CHECK: DIVERGENT: %tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 false, <4 x i32> %A, i1 false, <4 x i32> %B, <8 x i32> %C, i1 false)
9797
define amdgpu_kernel void @wmma_i32_16x16x16_ui8(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
9898
bb:
99-
%tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v8i32(i1 false, <4 x i32> %A, i1 false, <4 x i32> %B, <8 x i32> %C, i1 false)
99+
%tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 false, <4 x i32> %A, i1 false, <4 x i32> %B, <8 x i32> %C, i1 false)
100100
store <8 x i32> %tmp0, ptr addrspace(1) %out, align 32
101101
ret void
102102
}
103103

104-
; CHECK: DIVERGENT: %tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v2i32.v8i32(i1 false, <2 x i32> %A, i1 false, <2 x i32> %B, <8 x i32> %C, i1 false)
104+
; CHECK: DIVERGENT: %tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 false, <2 x i32> %A, i1 false, <2 x i32> %B, <8 x i32> %C, i1 false)
105105
define amdgpu_kernel void @wmma_i32_16x16x16_ui4(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
106106
bb:
107-
%tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v2i32.v8i32(i1 false, <2 x i32> %A, i1 false, <2 x i32> %B, <8 x i32> %C, i1 false)
107+
%tmp0 = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 false, <2 x i32> %A, i1 false, <2 x i32> %B, <8 x i32> %C, i1 false)
108108
store <8 x i32> %tmp0, ptr addrspace(1) %out, align 32
109109
ret void
110110
}
@@ -207,12 +207,12 @@ declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #1
207207
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
208208
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #1
209209
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
210-
declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v16f16.v8f32(<16 x half>, <16 x half> , <8 x float>) #1
211-
declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v16i16.v8f32(<16 x i16>, <16 x i16> , <8 x float>) #1
210+
declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half>, <16 x half> , <8 x float>) #1
211+
declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16>, <16 x i16> , <8 x float>) #1
212212
declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg) #1
213213
declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg) #1
214-
declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v8i32(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg) #1
215-
declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg) #1
214+
declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg) #1
215+
declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg) #1
216216
declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16(<8 x half>, <16 x half>, <8 x float>, i16)
217217
declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16(<8 x i16>, <16 x i16>, <8 x float>, i16)
218218
declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16(<8 x half>, <16 x half>, <8 x half>, i16)

llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,10 +529,10 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
529529
; GFX12-NEXT: s_clause 0x1
530530
; GFX12-NEXT: flat_load_b128 v[12:15], v[8:9]
531531
; GFX12-NEXT: flat_load_b128 v[16:19], v[8:9] offset:16
532-
; GFX12-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
532+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x101
533533
; GFX12-NEXT: v_and_b32_e32 v8, 0xffff, v12
534534
; GFX12-NEXT: v_and_b32_e32 v9, 0xffff, v14
535-
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
535+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
536536
; GFX12-NEXT: v_and_b32_e32 v14, 0xffff, v16
537537
; GFX12-NEXT: v_and_b32_e32 v16, 0xffff, v18
538538
; GFX12-NEXT: v_lshl_or_b32 v12, v13, 16, v8

llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16
99
; GFX12-NEXT: v_dual_mov_b32 v31, v17 :: v_dual_mov_b32 v30, v16
1010
; GFX12-NEXT: v_dual_mov_b32 v29, v15 :: v_dual_mov_b32 v28, v14
1111
; GFX12-NEXT: v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
12-
; GFX12-NEXT: s_waitcnt vmcnt(0)
12+
; GFX12-NEXT: s_wait_loadcnt 0x0
1313
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1414
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[26:33], v[0:3], v[4:11], v20
1515
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1
@@ -89,7 +89,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<8 x bfloat> %A,
8989
; GFX12-NEXT: v_or_b32_e32 v31, v35, v46
9090
; GFX12-NEXT: v_or_b32_e32 v32, v36, v47
9191
; GFX12-NEXT: v_or_b32_e32 v33, v37, v48
92-
; GFX12-NEXT: s_waitcnt vmcnt(0)
92+
; GFX12-NEXT: s_wait_loadcnt 0x0
9393
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
9494
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[0:7], v[8:11], v[26:33], v20
9595
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[12:19], v[8:11], v[26:33], v20 index_key:1
@@ -119,7 +119,7 @@ define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<8 x half> %A, <16
119119
; GFX12-NEXT: global_load_b32 v16, v[16:17], off
120120
; GFX12-NEXT: v_dual_mov_b32 v25, v15 :: v_dual_mov_b32 v24, v14
121121
; GFX12-NEXT: v_dual_mov_b32 v23, v13 :: v_dual_mov_b32 v22, v12
122-
; GFX12-NEXT: s_waitcnt vmcnt(0)
122+
; GFX12-NEXT: s_wait_loadcnt 0x0
123123
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
124124
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[22:25], v[0:3], v[4:11], v16
125125
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1
@@ -209,7 +209,7 @@ define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<8 x bfloat> %A,
209209
; GFX12-NEXT: v_or_b32_e32 v6, v31, v35
210210
; GFX12-NEXT: v_or_b32_e32 v7, v32, v36
211211
; GFX12-NEXT: v_dual_mov_b32 v24, v14 :: v_dual_mov_b32 v23, v13
212-
; GFX12-NEXT: s_waitcnt vmcnt(0)
212+
; GFX12-NEXT: s_wait_loadcnt 0x0
213213
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
214214
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[22:25], v[8:11], v[0:7], v16
215215
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[8:11], v[0:7], v16 index_key:1
@@ -237,7 +237,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(<2 x i32> %A, <4 x
237237
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
238238
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
239239
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
240-
; GFX12-NEXT: s_waitcnt vmcnt(0)
240+
; GFX12-NEXT: s_wait_loadcnt 0x0
241241
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
242242
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[20:27], v[0:1], v[2:5], v14
243243
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -269,7 +269,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, <2 x i32>
269269
; GFX12-NEXT: v_dual_mov_b32 v22, v8 :: v_dual_mov_b32 v21, v7
270270
; GFX12-NEXT: v_dual_mov_b32 v20, v6 :: v_dual_mov_b32 v19, v5
271271
; GFX12-NEXT: v_dual_mov_b32 v18, v4 :: v_dual_mov_b32 v17, v3
272-
; GFX12-NEXT: s_waitcnt vmcnt(0)
272+
; GFX12-NEXT: s_wait_loadcnt 0x0
273273
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
274274
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[17:24], v0, v[1:2], v11
275275
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1
@@ -301,7 +301,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(<2 x i32> %A,
301301
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
302302
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
303303
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
304-
; GFX12-NEXT: s_waitcnt vmcnt(0)
304+
; GFX12-NEXT: s_wait_loadcnt 0x0
305305
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
306306
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[20:27], v[0:1], v[2:5], v14
307307
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -333,7 +333,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(<2 x i32> %A,
333333
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
334334
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
335335
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
336-
; GFX12-NEXT: s_waitcnt vmcnt(0)
336+
; GFX12-NEXT: s_wait_loadcnt 0x0
337337
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
338338
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[20:27], v[0:1], v[2:5], v14
339339
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -365,7 +365,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(<2 x i32> %A,
365365
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
366366
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
367367
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
368-
; GFX12-NEXT: s_waitcnt vmcnt(0)
368+
; GFX12-NEXT: s_wait_loadcnt 0x0
369369
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
370370
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[20:27], v[0:1], v[2:5], v14
371371
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -397,7 +397,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(<2 x i32> %A,
397397
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
398398
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
399399
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
400-
; GFX12-NEXT: s_waitcnt vmcnt(0)
400+
; GFX12-NEXT: s_wait_loadcnt 0x0
401401
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
402402
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[20:27], v[0:1], v[2:5], v14
403403
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1

llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
457457
; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
458458
; GFX12: ; %bb.0: ; %bb
459459
; GFX12-NEXT: flat_load_b128 v[8:11], v[4:5]
460-
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
460+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
461461
; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v8
462462
; GFX12-NEXT: v_and_b32_e32 v5, 0xffff, v10
463463
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)

llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8
1717
; GFX12-NEXT: v_mov_b32_e32 v30, v8
1818
; GFX12-NEXT: v_mov_b32_e32 v29, v7
1919
; GFX12-NEXT: v_mov_b32_e32 v28, v6
20-
; GFX12-NEXT: s_waitcnt vmcnt(0)
20+
; GFX12-NEXT: s_wait_loadcnt 0x0
2121
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[20:23], v[0:1], v[2:5], v10
2222
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[24:27], v[0:1], v[2:5], v10 index_key:1
2323
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -87,7 +87,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<4 x bfloat> %A,
8787
; GFX12-NEXT: v_mov_b32_e32 v30, v8
8888
; GFX12-NEXT: v_mov_b32_e32 v29, v7
8989
; GFX12-NEXT: v_mov_b32_e32 v28, v6
90-
; GFX12-NEXT: s_waitcnt vmcnt(0)
90+
; GFX12-NEXT: s_wait_loadcnt 0x0
9191
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[20:23], v[4:5], v[0:3], v10
9292
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[24:27], v[4:5], v[0:3], v10 index_key:1
9393
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -127,7 +127,7 @@ define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<4 x half> %A, <8
127127
; GFX12-NEXT: v_mov_b32_e32 v18, v6
128128
; GFX12-NEXT: v_mov_b32_e32 v21, v7
129129
; GFX12-NEXT: v_mov_b32_e32 v20, v6
130-
; GFX12-NEXT: s_waitcnt vmcnt(0)
130+
; GFX12-NEXT: s_wait_loadcnt 0x0
131131
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[8:9], v[0:1], v[2:5], v22
132132
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[18:19], v[0:1], v[2:5], v22 index_key:1
133133
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -199,7 +199,7 @@ define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<4 x bfloat> %A,
199199
; GFX12-NEXT: v_mov_b32_e32 v18, v6
200200
; GFX12-NEXT: v_mov_b32_e32 v21, v7
201201
; GFX12-NEXT: v_mov_b32_e32 v20, v6
202-
; GFX12-NEXT: s_waitcnt vmcnt(0)
202+
; GFX12-NEXT: s_wait_loadcnt 0x0
203203
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[8:9], v[4:5], v[0:3], v22
204204
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[18:19], v[4:5], v[0:3], v22 index_key:1
205205
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -245,7 +245,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(i32 %A, <2 x i32>
245245
; GFX12-NEXT: v_mov_b32_e32 v27, v5
246246
; GFX12-NEXT: v_mov_b32_e32 v26, v4
247247
; GFX12-NEXT: v_mov_b32_e32 v25, v3
248-
; GFX12-NEXT: s_waitcnt vmcnt(0)
248+
; GFX12-NEXT: s_wait_loadcnt 0x0
249249
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[17:20], v0, v[1:2], v7
250250
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[21:24], v0, v[1:2], v7 index_key:1
251251
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -283,7 +283,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, i32 %B, <4
283283
; GFX12-NEXT: v_mov_b32_e32 v14, v4
284284
; GFX12-NEXT: v_mov_b32_e32 v13, v3
285285
; GFX12-NEXT: v_mov_b32_e32 v12, v2
286-
; GFX12-NEXT: s_waitcnt vmcnt(0)
286+
; GFX12-NEXT: s_wait_loadcnt 0x0
287287
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
288288
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[12:15], v0, v1, v6
289289
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1
@@ -311,7 +311,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_index_key(i32 %A, <2 x i32>
311311
; GFX12-NEXT: v_mov_b32_e32 v15, v5
312312
; GFX12-NEXT: v_mov_b32_e32 v14, v4
313313
; GFX12-NEXT: v_mov_b32_e32 v13, v3
314-
; GFX12-NEXT: s_waitcnt vmcnt(0)
314+
; GFX12-NEXT: s_wait_loadcnt 0x0
315315
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
316316
; GFX12-NEXT: v_swmmac_i32_16x16x64_iu4 v[13:16], v0, v[1:2], v7
317317
; GFX12-NEXT: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1
@@ -347,7 +347,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(i32 %A, <2 x i
347347
; GFX12-NEXT: v_mov_b32_e32 v27, v5
348348
; GFX12-NEXT: v_mov_b32_e32 v26, v4
349349
; GFX12-NEXT: v_mov_b32_e32 v25, v3
350-
; GFX12-NEXT: s_waitcnt vmcnt(0)
350+
; GFX12-NEXT: s_wait_loadcnt 0x0
351351
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[17:20], v0, v[1:2], v7
352352
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
353353
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -393,7 +393,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(i32 %A, <2 x i
393393
; GFX12-NEXT: v_mov_b32_e32 v27, v5
394394
; GFX12-NEXT: v_mov_b32_e32 v26, v4
395395
; GFX12-NEXT: v_mov_b32_e32 v25, v3
396-
; GFX12-NEXT: s_waitcnt vmcnt(0)
396+
; GFX12-NEXT: s_wait_loadcnt 0x0
397397
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[17:20], v0, v[1:2], v7
398398
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
399399
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -439,7 +439,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(i32 %A, <2 x i
439439
; GFX12-NEXT: v_mov_b32_e32 v27, v5
440440
; GFX12-NEXT: v_mov_b32_e32 v26, v4
441441
; GFX12-NEXT: v_mov_b32_e32 v25, v3
442-
; GFX12-NEXT: s_waitcnt vmcnt(0)
442+
; GFX12-NEXT: s_wait_loadcnt 0x0
443443
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[17:20], v0, v[1:2], v7
444444
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
445445
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
@@ -485,7 +485,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(i32 %A, <2 x i
485485
; GFX12-NEXT: v_mov_b32_e32 v27, v5
486486
; GFX12-NEXT: v_mov_b32_e32 v26, v4
487487
; GFX12-NEXT: v_mov_b32_e32 v25, v3
488-
; GFX12-NEXT: s_waitcnt vmcnt(0)
488+
; GFX12-NEXT: s_wait_loadcnt 0x0
489489
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[17:20], v0, v[1:2], v7
490490
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
491491
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)

llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,10 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
463463
; GFX12-NEXT: s_clause 0x1
464464
; GFX12-NEXT: flat_load_b128 v[12:15], v[8:9] offset:16
465465
; GFX12-NEXT: flat_load_b128 v[16:19], v[8:9]
466-
; GFX12-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
466+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x101
467467
; GFX12-NEXT: v_perm_b32 v15, v15, v14, 0x5040100
468468
; GFX12-NEXT: v_perm_b32 v14, v13, v12, 0x5040100
469-
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
469+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
470470
; GFX12-NEXT: v_perm_b32 v13, v19, v18, 0x5040100
471471
; GFX12-NEXT: v_perm_b32 v12, v17, v16, 0x5040100
472472
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)

0 commit comments

Comments
 (0)