@@ -9,7 +9,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16
9
9
; GFX12-NEXT: v_dual_mov_b32 v31, v17 :: v_dual_mov_b32 v30, v16
10
10
; GFX12-NEXT: v_dual_mov_b32 v29, v15 :: v_dual_mov_b32 v28, v14
11
11
; GFX12-NEXT: v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
12
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
12
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
13
13
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
14
14
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[26:33], v[0:3], v[4:11], v20
15
15
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1
@@ -89,7 +89,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<8 x bfloat> %A,
89
89
; GFX12-NEXT: v_or_b32_e32 v31, v35, v46
90
90
; GFX12-NEXT: v_or_b32_e32 v32, v36, v47
91
91
; GFX12-NEXT: v_or_b32_e32 v33, v37, v48
92
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
92
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
93
93
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
94
94
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[0:7], v[8:11], v[26:33], v20
95
95
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[12:19], v[8:11], v[26:33], v20 index_key:1
@@ -119,7 +119,7 @@ define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<8 x half> %A, <16
119
119
; GFX12-NEXT: global_load_b32 v16, v[16:17], off
120
120
; GFX12-NEXT: v_dual_mov_b32 v25, v15 :: v_dual_mov_b32 v24, v14
121
121
; GFX12-NEXT: v_dual_mov_b32 v23, v13 :: v_dual_mov_b32 v22, v12
122
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
122
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
123
123
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
124
124
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[22:25], v[0:3], v[4:11], v16
125
125
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1
@@ -209,7 +209,7 @@ define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<8 x bfloat> %A,
209
209
; GFX12-NEXT: v_or_b32_e32 v6, v31, v35
210
210
; GFX12-NEXT: v_or_b32_e32 v7, v32, v36
211
211
; GFX12-NEXT: v_dual_mov_b32 v24, v14 :: v_dual_mov_b32 v23, v13
212
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
212
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
213
213
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
214
214
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[22:25], v[8:11], v[0:7], v16
215
215
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[8:11], v[0:7], v16 index_key:1
@@ -237,7 +237,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(<2 x i32> %A, <4 x
237
237
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
238
238
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
239
239
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
240
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
240
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
241
241
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
242
242
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[20:27], v[0:1], v[2:5], v14
243
243
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -269,7 +269,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, <2 x i32>
269
269
; GFX12-NEXT: v_dual_mov_b32 v22, v8 :: v_dual_mov_b32 v21, v7
270
270
; GFX12-NEXT: v_dual_mov_b32 v20, v6 :: v_dual_mov_b32 v19, v5
271
271
; GFX12-NEXT: v_dual_mov_b32 v18, v4 :: v_dual_mov_b32 v17, v3
272
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
272
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
273
273
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
274
274
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[17:24], v0, v[1:2], v11
275
275
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1
@@ -301,7 +301,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(<2 x i32> %A,
301
301
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
302
302
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
303
303
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
304
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
304
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
305
305
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
306
306
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[20:27], v[0:1], v[2:5], v14
307
307
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -333,7 +333,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(<2 x i32> %A,
333
333
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
334
334
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
335
335
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
336
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
336
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
337
337
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
338
338
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[20:27], v[0:1], v[2:5], v14
339
339
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -365,7 +365,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(<2 x i32> %A,
365
365
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
366
366
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
367
367
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
368
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
368
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
369
369
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
370
370
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[20:27], v[0:1], v[2:5], v14
371
371
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
@@ -397,7 +397,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(<2 x i32> %A,
397
397
; GFX12-NEXT: v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
398
398
; GFX12-NEXT: v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
399
399
; GFX12-NEXT: v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
400
- ; GFX12-NEXT: s_waitcnt vmcnt(0)
400
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
401
401
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
402
402
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[20:27], v[0:1], v[2:5], v14
403
403
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
0 commit comments