@@ -20,7 +20,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
20
20
; GFX9-NEXT: v_mov_b32_e32 v1, s4
21
21
; GFX9-NEXT: s_lshl_b32 s5, s5, 6
22
22
; GFX9-NEXT: s_mov_b32 s33, 0
23
- ; GFX9-NEXT: s_add_i32 s32, s4, s5
23
+ ; GFX9-NEXT: s_add_u32 s32, s4, s5
24
24
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
25
25
; GFX9-NEXT: s_endpgm
26
26
;
@@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
39
39
; GFX10-NEXT: s_lshl2_add_u32 s5, s5, 15
40
40
; GFX10-NEXT: s_and_b32 s5, s5, -16
41
41
; GFX10-NEXT: s_lshl_b32 s5, s5, 5
42
- ; GFX10-NEXT: s_add_i32 s32, s4, s5
42
+ ; GFX10-NEXT: s_add_u32 s32, s4, s5
43
43
; GFX10-NEXT: s_endpgm
44
44
;
45
45
; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align4:
@@ -56,7 +56,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
56
56
; GFX11-NEXT: s_and_b32 s1, s1, -16
57
57
; GFX11-NEXT: s_lshl_b32 s1, s1, 5
58
58
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
59
- ; GFX11-NEXT: s_add_i32 s32, s0, s1
59
+ ; GFX11-NEXT: s_add_u32 s32, s0, s1
60
60
; GFX11-NEXT: s_endpgm
61
61
%alloca = alloca i32 , i32 %n , align 4 , addrspace (5 )
62
62
store i32 0 , ptr addrspace (5 ) %alloca
@@ -84,7 +84,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
84
84
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
85
85
; GFX9-NEXT: s_and_b32 s4, s4, -16
86
86
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
87
- ; GFX9-NEXT: s_add_i32 s32, s6, s4
87
+ ; GFX9-NEXT: s_add_u32 s32, s6, s4
88
88
; GFX9-NEXT: s_mov_b32 s32, s33
89
89
; GFX9-NEXT: s_mov_b32 s33, s7
90
90
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -110,7 +110,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
110
110
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
111
111
; GFX10-NEXT: s_and_b32 s4, s4, -16
112
112
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
113
- ; GFX10-NEXT: s_add_i32 s32, s6, s4
113
+ ; GFX10-NEXT: s_add_u32 s32, s6, s4
114
114
; GFX10-NEXT: s_mov_b32 s32, s33
115
115
; GFX10-NEXT: s_mov_b32 s33, s7
116
116
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -136,7 +136,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136
136
; GFX11-NEXT: s_and_b32 s0, s0, -16
137
137
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
138
138
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
139
- ; GFX11-NEXT: s_add_i32 s32, s2, s0
139
+ ; GFX11-NEXT: s_add_u32 s32, s2, s0
140
140
; GFX11-NEXT: s_mov_b32 s32, s33
141
141
; GFX11-NEXT: s_mov_b32 s33, s3
142
142
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -161,7 +161,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
161
161
; GFX9-NEXT: v_mov_b32_e32 v1, s4
162
162
; GFX9-NEXT: s_lshl_b32 s5, s5, 6
163
163
; GFX9-NEXT: s_mov_b32 s33, 0
164
- ; GFX9-NEXT: s_add_i32 s32, s4, s5
164
+ ; GFX9-NEXT: s_add_u32 s32, s4, s5
165
165
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
166
166
; GFX9-NEXT: s_endpgm
167
167
;
@@ -180,7 +180,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
180
180
; GFX10-NEXT: s_lshl2_add_u32 s5, s5, 15
181
181
; GFX10-NEXT: s_and_b32 s5, s5, -16
182
182
; GFX10-NEXT: s_lshl_b32 s5, s5, 5
183
- ; GFX10-NEXT: s_add_i32 s32, s4, s5
183
+ ; GFX10-NEXT: s_add_u32 s32, s4, s5
184
184
; GFX10-NEXT: s_endpgm
185
185
;
186
186
; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align16:
@@ -197,7 +197,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
197
197
; GFX11-NEXT: s_and_b32 s1, s1, -16
198
198
; GFX11-NEXT: s_lshl_b32 s1, s1, 5
199
199
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
200
- ; GFX11-NEXT: s_add_i32 s32, s0, s1
200
+ ; GFX11-NEXT: s_add_u32 s32, s0, s1
201
201
; GFX11-NEXT: s_endpgm
202
202
%alloca = alloca i32 , i32 %n , align 16 , addrspace (5 )
203
203
store i32 0 , ptr addrspace (5 ) %alloca
@@ -225,7 +225,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
225
225
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
226
226
; GFX9-NEXT: s_and_b32 s4, s4, -16
227
227
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
228
- ; GFX9-NEXT: s_add_i32 s32, s6, s4
228
+ ; GFX9-NEXT: s_add_u32 s32, s6, s4
229
229
; GFX9-NEXT: s_mov_b32 s32, s33
230
230
; GFX9-NEXT: s_mov_b32 s33, s7
231
231
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -251,7 +251,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
251
251
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
252
252
; GFX10-NEXT: s_and_b32 s4, s4, -16
253
253
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
254
- ; GFX10-NEXT: s_add_i32 s32, s6, s4
254
+ ; GFX10-NEXT: s_add_u32 s32, s6, s4
255
255
; GFX10-NEXT: s_mov_b32 s32, s33
256
256
; GFX10-NEXT: s_mov_b32 s33, s7
257
257
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -277,7 +277,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277
277
; GFX11-NEXT: s_and_b32 s0, s0, -16
278
278
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
279
279
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
280
- ; GFX11-NEXT: s_add_i32 s32, s2, s0
280
+ ; GFX11-NEXT: s_add_u32 s32, s2, s0
281
281
; GFX11-NEXT: s_mov_b32 s32, s33
282
282
; GFX11-NEXT: s_mov_b32 s33, s3
283
283
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -294,7 +294,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
294
294
; GFX9-NEXT: s_movk_i32 s32, 0x800
295
295
; GFX9-NEXT: s_add_u32 s0, s0, s17
296
296
; GFX9-NEXT: s_addc_u32 s1, s1, 0
297
- ; GFX9-NEXT: s_add_i32 s5, s32, 0x7ff
297
+ ; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff
298
298
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
299
299
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
300
300
; GFX9-NEXT: s_and_b32 s5, s5, 0xfffff800
@@ -303,7 +303,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
303
303
; GFX9-NEXT: v_mov_b32_e32 v1, s5
304
304
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
305
305
; GFX9-NEXT: s_mov_b32 s33, 0
306
- ; GFX9-NEXT: s_add_i32 s32, s5, s4
306
+ ; GFX9-NEXT: s_add_u32 s32, s5, s4
307
307
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
308
308
; GFX9-NEXT: s_endpgm
309
309
;
@@ -313,7 +313,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
313
313
; GFX10-NEXT: s_movk_i32 s32, 0x400
314
314
; GFX10-NEXT: s_add_u32 s0, s0, s17
315
315
; GFX10-NEXT: s_addc_u32 s1, s1, 0
316
- ; GFX10-NEXT: s_add_i32 s5, s32, 0x3ff
316
+ ; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff
317
317
; GFX10-NEXT: v_mov_b32_e32 v0, 0
318
318
; GFX10-NEXT: s_and_b32 s5, s5, 0xfffffc00
319
319
; GFX10-NEXT: s_mov_b32 s33, 0
@@ -323,15 +323,15 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
323
323
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
324
324
; GFX10-NEXT: s_and_b32 s4, s4, -16
325
325
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
326
- ; GFX10-NEXT: s_add_i32 s32, s5, s4
326
+ ; GFX10-NEXT: s_add_u32 s32, s5, s4
327
327
; GFX10-NEXT: s_endpgm
328
328
;
329
329
; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align32:
330
330
; GFX11: ; %bb.0:
331
331
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0
332
332
; GFX11-NEXT: s_mov_b32 s32, 32
333
333
; GFX11-NEXT: v_mov_b32_e32 v0, 0
334
- ; GFX11-NEXT: s_add_i32 s1, s32, 0x3ff
334
+ ; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff
335
335
; GFX11-NEXT: s_mov_b32 s33, 0
336
336
; GFX11-NEXT: s_and_b32 s1, s1, 0xfffffc00
337
337
; GFX11-NEXT: scratch_store_b32 off, v0, s1
@@ -341,7 +341,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
341
341
; GFX11-NEXT: s_and_b32 s0, s0, -16
342
342
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
343
343
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
344
- ; GFX11-NEXT: s_add_i32 s32, s1, s0
344
+ ; GFX11-NEXT: s_add_u32 s32, s1, s0
345
345
; GFX11-NEXT: s_endpgm
346
346
%alloca = alloca i32 , i32 %n , align 32 , addrspace (5 )
347
347
store i32 0 , ptr addrspace (5 ) %alloca
@@ -366,15 +366,15 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
366
366
; GFX9-NEXT: s_mov_b32 s33, s6
367
367
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
368
368
; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
369
- ; GFX9-NEXT: s_add_i32 s5, s32, 0x7ff
369
+ ; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff
370
370
; GFX9-NEXT: s_and_b32 s5, s5, 0xfffff800
371
371
; GFX9-NEXT: v_mov_b32_e32 v1, s5
372
372
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
373
373
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
374
374
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
375
375
; GFX9-NEXT: s_and_b32 s4, s4, -16
376
376
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
377
- ; GFX9-NEXT: s_add_i32 s32, s5, s4
377
+ ; GFX9-NEXT: s_add_u32 s32, s5, s4
378
378
; GFX9-NEXT: s_mov_b32 s32, s34
379
379
; GFX9-NEXT: s_mov_b32 s34, s7
380
380
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -397,15 +397,15 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
397
397
; GFX10-NEXT: s_mov_b32 s33, s6
398
398
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
399
399
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
400
- ; GFX10-NEXT: s_add_i32 s5, s32, 0x3ff
400
+ ; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff
401
401
; GFX10-NEXT: s_and_b32 s5, s5, 0xfffffc00
402
402
; GFX10-NEXT: v_mov_b32_e32 v1, s5
403
403
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
404
404
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
405
405
; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15
406
406
; GFX10-NEXT: s_and_b32 s4, s4, -16
407
407
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
408
- ; GFX10-NEXT: s_add_i32 s32, s5, s4
408
+ ; GFX10-NEXT: s_add_u32 s32, s5, s4
409
409
; GFX10-NEXT: s_mov_b32 s32, s34
410
410
; GFX10-NEXT: s_mov_b32 s34, s7
411
411
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -427,7 +427,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
427
427
; GFX11-NEXT: s_mov_b32 s33, s2
428
428
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
429
429
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
430
- ; GFX11-NEXT: s_add_i32 s1, s32, 0x3ff
430
+ ; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff
431
431
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
432
432
; GFX11-NEXT: s_and_b32 s1, s1, 0xfffffc00
433
433
; GFX11-NEXT: scratch_store_b32 off, v0, s1
@@ -436,7 +436,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
436
436
; GFX11-NEXT: s_and_b32 s0, s0, -16
437
437
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
438
438
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
439
- ; GFX11-NEXT: s_add_i32 s32, s1, s0
439
+ ; GFX11-NEXT: s_add_u32 s32, s1, s0
440
440
; GFX11-NEXT: s_mov_b32 s32, s34
441
441
; GFX11-NEXT: s_mov_b32 s34, s3
442
442
; GFX11-NEXT: s_setpc_b64 s[30:31]
0 commit comments