Skip to content

Commit 63f76a4

Browse files
committed
Disable PTRADD for 32-bit pointers and remove relevant changes
1 parent 1083cdd commit 63f76a4

28 files changed

+539
-1158
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
6565
// the SelectionDAG.
6666
static cl::opt<bool> UseSelectionDAGPTRADD(
6767
"amdgpu-use-sdag-ptradd", cl::Hidden,
68-
cl::desc("Generate ISD::PTRADD nodes in the SelectionDAG ISel"),
68+
cl::desc("Generate ISD::PTRADD nodes for 64-bit pointer arithmetic in the "
69+
"SelectionDAG ISel"),
6970
cl::init(false));
7071

7172
static bool denormalModeIsFlushAllF32(const MachineFunction &MF) {
@@ -10428,7 +10429,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1042810429

1042910430
bool SITargetLowering::shouldPreservePtrArith(const Function &F,
1043010431
EVT PtrVT) const {
10431-
return UseSelectionDAGPTRADD;
10432+
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
1043210433
}
1043310434

1043410435
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,25 +1390,6 @@ def : GCNPat<
13901390
(S_ADD_U64_PSEUDO $src0, $src1)>;
13911391
}
13921392

1393-
def : GCNPat<
1394-
(DivergentBinFrag<ptradd> i32:$src0, i32:$src1),
1395-
(V_ADD_U32_e64 $src0, $src1, 0)> {
1396-
let SubtargetPredicate = HasAddNoCarryInsts;
1397-
}
1398-
1399-
def : GCNPat<
1400-
(DivergentBinFrag<ptradd> i32:$src0, i32:$src1),
1401-
(V_ADD_CO_U32_e64 $src0, $src1)> {
1402-
let SubtargetPredicate = NotHasAddNoCarryInsts;
1403-
}
1404-
1405-
// Whether we select S_ADD_I32 or S_ADD_U32 does not make much of a
1406-
// difference. Most notably, S_ADD_I32 instructions can be transformed
1407-
// to S_ADDK_I32, so we select that.
1408-
def : GCNPat<
1409-
(UniformBinFrag<ptradd> i32:$src0, i32:$src1),
1410-
(S_ADD_I32 $src0, $src1)>;
1411-
14121393
/********** ============================================ **********/
14131394
/********** Extraction, Insertion, Building and Casting **********/
14141395
/********** ============================================ **********/

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ define amdgpu_kernel void @kernel_caller_stack() {
3636
; FLATSCR-NEXT: s_mov_b32 s32, 0
3737
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
3838
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
39-
; FLATSCR-NEXT: s_add_i32 s0, s32, 4
39+
; FLATSCR-NEXT: s_add_u32 s0, s32, 4
4040
; FLATSCR-NEXT: v_mov_b32_e32 v0, 9
4141
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
42-
; FLATSCR-NEXT: s_add_i32 s0, s32, 8
42+
; FLATSCR-NEXT: s_add_u32 s0, s32, 8
4343
; FLATSCR-NEXT: v_mov_b32_e32 v0, 10
4444
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
45-
; FLATSCR-NEXT: s_add_i32 s0, s32, 12
45+
; FLATSCR-NEXT: s_add_u32 s0, s32, 12
4646
; FLATSCR-NEXT: v_mov_b32_e32 v0, 11
47-
; FLATSCR-NEXT: s_add_i32 s2, s32, 16
47+
; FLATSCR-NEXT: s_add_u32 s2, s32, 16
4848
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
4949
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
5050
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
@@ -189,13 +189,13 @@ define amdgpu_kernel void @kernel_caller_byval() {
189189
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
190190
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4
191191
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_byval@rel32@hi+12
192-
; FLATSCR-NEXT: s_add_i32 s2, s32, 8
193-
; FLATSCR-NEXT: s_add_i32 s3, s32, 16
194-
; FLATSCR-NEXT: s_add_i32 s4, s32, 24
195-
; FLATSCR-NEXT: s_add_i32 s5, s32, 32
196-
; FLATSCR-NEXT: s_add_i32 s6, s32, 40
197-
; FLATSCR-NEXT: s_add_i32 s7, s32, 48
198-
; FLATSCR-NEXT: s_add_i32 s8, s32, 56
192+
; FLATSCR-NEXT: s_add_u32 s2, s32, 8
193+
; FLATSCR-NEXT: s_add_u32 s3, s32, 16
194+
; FLATSCR-NEXT: s_add_u32 s4, s32, 24
195+
; FLATSCR-NEXT: s_add_u32 s5, s32, 32
196+
; FLATSCR-NEXT: s_add_u32 s6, s32, 40
197+
; FLATSCR-NEXT: s_add_u32 s7, s32, 48
198+
; FLATSCR-NEXT: s_add_u32 s8, s32, 56
199199
; FLATSCR-NEXT: s_waitcnt vmcnt(7)
200200
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32
201201
; FLATSCR-NEXT: s_waitcnt vmcnt(7)
@@ -266,16 +266,16 @@ define void @func_caller_stack() {
266266
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
267267
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
268268
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
269-
; FLATSCR-NEXT: s_add_i32 s0, s32, 4
269+
; FLATSCR-NEXT: s_add_u32 s0, s32, 4
270270
; FLATSCR-NEXT: v_mov_b32_e32 v0, 9
271271
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
272-
; FLATSCR-NEXT: s_add_i32 s0, s32, 8
272+
; FLATSCR-NEXT: s_add_u32 s0, s32, 8
273273
; FLATSCR-NEXT: v_mov_b32_e32 v0, 10
274274
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
275-
; FLATSCR-NEXT: s_add_i32 s0, s32, 12
275+
; FLATSCR-NEXT: s_add_u32 s0, s32, 12
276276
; FLATSCR-NEXT: v_mov_b32_e32 v0, 11
277277
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
278-
; FLATSCR-NEXT: s_add_i32 s0, s32, 16
278+
; FLATSCR-NEXT: s_add_u32 s0, s32, 16
279279
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
280280
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
281281
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
@@ -393,8 +393,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
393393
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
394394
; FLATSCR-NEXT: v_add_u32_e32 v3, 8, v0
395395
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2
396-
; FLATSCR-NEXT: s_add_i32 s0, s32, 8
397-
; FLATSCR-NEXT: s_add_i32 s2, s32, 56
396+
; FLATSCR-NEXT: s_add_u32 s0, s32, 8
397+
; FLATSCR-NEXT: s_add_u32 s2, s32, 56
398398
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
399399
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
400400
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
@@ -404,28 +404,28 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
404404
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
405405
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0
406406
; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v3, off
407-
; FLATSCR-NEXT: s_add_i32 s0, s32, 16
407+
; FLATSCR-NEXT: s_add_u32 s0, s32, 16
408408
; FLATSCR-NEXT: v_add_u32_e32 v3, 24, v0
409409
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
410410
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0
411411
; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v3, off
412-
; FLATSCR-NEXT: s_add_i32 s0, s32, 24
412+
; FLATSCR-NEXT: s_add_u32 s0, s32, 24
413413
; FLATSCR-NEXT: v_add_u32_e32 v3, 32, v0
414414
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
415415
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0
416416
; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v3, off
417-
; FLATSCR-NEXT: s_add_i32 s0, s32, 32
417+
; FLATSCR-NEXT: s_add_u32 s0, s32, 32
418418
; FLATSCR-NEXT: v_add_u32_e32 v3, 40, v0
419419
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
420420
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0
421421
; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v3, off
422-
; FLATSCR-NEXT: s_add_i32 s0, s32, 40
422+
; FLATSCR-NEXT: s_add_u32 s0, s32, 40
423423
; FLATSCR-NEXT: v_add_u32_e32 v3, 48, v0
424424
; FLATSCR-NEXT: v_add_u32_e32 v0, 56, v0
425425
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
426426
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0
427427
; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v3, off
428-
; FLATSCR-NEXT: s_add_i32 s0, s32, 48
428+
; FLATSCR-NEXT: s_add_u32 s0, s32, 48
429429
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
430430
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0
431431
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], v0, off

llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
2020
; GFX9-NEXT: v_mov_b32_e32 v1, s4
2121
; GFX9-NEXT: s_lshl_b32 s5, s5, 6
2222
; GFX9-NEXT: s_mov_b32 s33, 0
23-
; GFX9-NEXT: s_add_i32 s32, s4, s5
23+
; GFX9-NEXT: s_add_u32 s32, s4, s5
2424
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
2525
; GFX9-NEXT: s_endpgm
2626
;
@@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
3939
; GFX10-NEXT: s_lshl2_add_u32 s5, s5, 15
4040
; GFX10-NEXT: s_and_b32 s5, s5, -16
4141
; GFX10-NEXT: s_lshl_b32 s5, s5, 5
42-
; GFX10-NEXT: s_add_i32 s32, s4, s5
42+
; GFX10-NEXT: s_add_u32 s32, s4, s5
4343
; GFX10-NEXT: s_endpgm
4444
;
4545
; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align4:
@@ -56,7 +56,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
5656
; GFX11-NEXT: s_and_b32 s1, s1, -16
5757
; GFX11-NEXT: s_lshl_b32 s1, s1, 5
5858
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
59-
; GFX11-NEXT: s_add_i32 s32, s0, s1
59+
; GFX11-NEXT: s_add_u32 s32, s0, s1
6060
; GFX11-NEXT: s_endpgm
6161
%alloca = alloca i32, i32 %n, align 4, addrspace(5)
6262
store i32 0, ptr addrspace(5) %alloca
@@ -84,7 +84,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8484
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
8585
; GFX9-NEXT: s_and_b32 s4, s4, -16
8686
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
87-
; GFX9-NEXT: s_add_i32 s32, s6, s4
87+
; GFX9-NEXT: s_add_u32 s32, s6, s4
8888
; GFX9-NEXT: s_mov_b32 s32, s33
8989
; GFX9-NEXT: s_mov_b32 s33, s7
9090
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -110,7 +110,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
110110
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
111111
; GFX10-NEXT: s_and_b32 s4, s4, -16
112112
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
113-
; GFX10-NEXT: s_add_i32 s32, s6, s4
113+
; GFX10-NEXT: s_add_u32 s32, s6, s4
114114
; GFX10-NEXT: s_mov_b32 s32, s33
115115
; GFX10-NEXT: s_mov_b32 s33, s7
116116
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -136,7 +136,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136136
; GFX11-NEXT: s_and_b32 s0, s0, -16
137137
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
138138
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
139-
; GFX11-NEXT: s_add_i32 s32, s2, s0
139+
; GFX11-NEXT: s_add_u32 s32, s2, s0
140140
; GFX11-NEXT: s_mov_b32 s32, s33
141141
; GFX11-NEXT: s_mov_b32 s33, s3
142142
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -161,7 +161,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
161161
; GFX9-NEXT: v_mov_b32_e32 v1, s4
162162
; GFX9-NEXT: s_lshl_b32 s5, s5, 6
163163
; GFX9-NEXT: s_mov_b32 s33, 0
164-
; GFX9-NEXT: s_add_i32 s32, s4, s5
164+
; GFX9-NEXT: s_add_u32 s32, s4, s5
165165
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
166166
; GFX9-NEXT: s_endpgm
167167
;
@@ -180,7 +180,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
180180
; GFX10-NEXT: s_lshl2_add_u32 s5, s5, 15
181181
; GFX10-NEXT: s_and_b32 s5, s5, -16
182182
; GFX10-NEXT: s_lshl_b32 s5, s5, 5
183-
; GFX10-NEXT: s_add_i32 s32, s4, s5
183+
; GFX10-NEXT: s_add_u32 s32, s4, s5
184184
; GFX10-NEXT: s_endpgm
185185
;
186186
; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align16:
@@ -197,7 +197,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
197197
; GFX11-NEXT: s_and_b32 s1, s1, -16
198198
; GFX11-NEXT: s_lshl_b32 s1, s1, 5
199199
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
200-
; GFX11-NEXT: s_add_i32 s32, s0, s1
200+
; GFX11-NEXT: s_add_u32 s32, s0, s1
201201
; GFX11-NEXT: s_endpgm
202202
%alloca = alloca i32, i32 %n, align 16, addrspace(5)
203203
store i32 0, ptr addrspace(5) %alloca
@@ -225,7 +225,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
225225
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
226226
; GFX9-NEXT: s_and_b32 s4, s4, -16
227227
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
228-
; GFX9-NEXT: s_add_i32 s32, s6, s4
228+
; GFX9-NEXT: s_add_u32 s32, s6, s4
229229
; GFX9-NEXT: s_mov_b32 s32, s33
230230
; GFX9-NEXT: s_mov_b32 s33, s7
231231
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -251,7 +251,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
251251
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
252252
; GFX10-NEXT: s_and_b32 s4, s4, -16
253253
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
254-
; GFX10-NEXT: s_add_i32 s32, s6, s4
254+
; GFX10-NEXT: s_add_u32 s32, s6, s4
255255
; GFX10-NEXT: s_mov_b32 s32, s33
256256
; GFX10-NEXT: s_mov_b32 s33, s7
257257
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -277,7 +277,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277277
; GFX11-NEXT: s_and_b32 s0, s0, -16
278278
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
279279
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
280-
; GFX11-NEXT: s_add_i32 s32, s2, s0
280+
; GFX11-NEXT: s_add_u32 s32, s2, s0
281281
; GFX11-NEXT: s_mov_b32 s32, s33
282282
; GFX11-NEXT: s_mov_b32 s33, s3
283283
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -294,7 +294,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
294294
; GFX9-NEXT: s_movk_i32 s32, 0x800
295295
; GFX9-NEXT: s_add_u32 s0, s0, s17
296296
; GFX9-NEXT: s_addc_u32 s1, s1, 0
297-
; GFX9-NEXT: s_add_i32 s5, s32, 0x7ff
297+
; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff
298298
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
299299
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
300300
; GFX9-NEXT: s_and_b32 s5, s5, 0xfffff800
@@ -303,7 +303,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
303303
; GFX9-NEXT: v_mov_b32_e32 v1, s5
304304
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
305305
; GFX9-NEXT: s_mov_b32 s33, 0
306-
; GFX9-NEXT: s_add_i32 s32, s5, s4
306+
; GFX9-NEXT: s_add_u32 s32, s5, s4
307307
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
308308
; GFX9-NEXT: s_endpgm
309309
;
@@ -313,7 +313,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
313313
; GFX10-NEXT: s_movk_i32 s32, 0x400
314314
; GFX10-NEXT: s_add_u32 s0, s0, s17
315315
; GFX10-NEXT: s_addc_u32 s1, s1, 0
316-
; GFX10-NEXT: s_add_i32 s5, s32, 0x3ff
316+
; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff
317317
; GFX10-NEXT: v_mov_b32_e32 v0, 0
318318
; GFX10-NEXT: s_and_b32 s5, s5, 0xfffffc00
319319
; GFX10-NEXT: s_mov_b32 s33, 0
@@ -323,15 +323,15 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
323323
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
324324
; GFX10-NEXT: s_and_b32 s4, s4, -16
325325
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
326-
; GFX10-NEXT: s_add_i32 s32, s5, s4
326+
; GFX10-NEXT: s_add_u32 s32, s5, s4
327327
; GFX10-NEXT: s_endpgm
328328
;
329329
; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align32:
330330
; GFX11: ; %bb.0:
331331
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0
332332
; GFX11-NEXT: s_mov_b32 s32, 32
333333
; GFX11-NEXT: v_mov_b32_e32 v0, 0
334-
; GFX11-NEXT: s_add_i32 s1, s32, 0x3ff
334+
; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff
335335
; GFX11-NEXT: s_mov_b32 s33, 0
336336
; GFX11-NEXT: s_and_b32 s1, s1, 0xfffffc00
337337
; GFX11-NEXT: scratch_store_b32 off, v0, s1
@@ -341,7 +341,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
341341
; GFX11-NEXT: s_and_b32 s0, s0, -16
342342
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
343343
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
344-
; GFX11-NEXT: s_add_i32 s32, s1, s0
344+
; GFX11-NEXT: s_add_u32 s32, s1, s0
345345
; GFX11-NEXT: s_endpgm
346346
%alloca = alloca i32, i32 %n, align 32, addrspace(5)
347347
store i32 0, ptr addrspace(5) %alloca
@@ -366,15 +366,15 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
366366
; GFX9-NEXT: s_mov_b32 s33, s6
367367
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
368368
; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
369-
; GFX9-NEXT: s_add_i32 s5, s32, 0x7ff
369+
; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff
370370
; GFX9-NEXT: s_and_b32 s5, s5, 0xfffff800
371371
; GFX9-NEXT: v_mov_b32_e32 v1, s5
372372
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
373373
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
374374
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
375375
; GFX9-NEXT: s_and_b32 s4, s4, -16
376376
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
377-
; GFX9-NEXT: s_add_i32 s32, s5, s4
377+
; GFX9-NEXT: s_add_u32 s32, s5, s4
378378
; GFX9-NEXT: s_mov_b32 s32, s34
379379
; GFX9-NEXT: s_mov_b32 s34, s7
380380
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -397,15 +397,15 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
397397
; GFX10-NEXT: s_mov_b32 s33, s6
398398
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
399399
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
400-
; GFX10-NEXT: s_add_i32 s5, s32, 0x3ff
400+
; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff
401401
; GFX10-NEXT: s_and_b32 s5, s5, 0xfffffc00
402402
; GFX10-NEXT: v_mov_b32_e32 v1, s5
403403
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
404404
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
405405
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
406406
; GFX10-NEXT: s_and_b32 s4, s4, -16
407407
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
408-
; GFX10-NEXT: s_add_i32 s32, s5, s4
408+
; GFX10-NEXT: s_add_u32 s32, s5, s4
409409
; GFX10-NEXT: s_mov_b32 s32, s34
410410
; GFX10-NEXT: s_mov_b32 s34, s7
411411
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -427,7 +427,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
427427
; GFX11-NEXT: s_mov_b32 s33, s2
428428
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
429429
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
430-
; GFX11-NEXT: s_add_i32 s1, s32, 0x3ff
430+
; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff
431431
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
432432
; GFX11-NEXT: s_and_b32 s1, s1, 0xfffffc00
433433
; GFX11-NEXT: scratch_store_b32 off, v0, s1
@@ -436,7 +436,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
436436
; GFX11-NEXT: s_and_b32 s0, s0, -16
437437
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
438438
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
439-
; GFX11-NEXT: s_add_i32 s32, s1, s0
439+
; GFX11-NEXT: s_add_u32 s32, s1, s0
440440
; GFX11-NEXT: s_mov_b32 s32, s34
441441
; GFX11-NEXT: s_mov_b32 s34, s3
442442
; GFX11-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)