@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) {
263
263
store float 1 .0 , ptr addrspace (1 ) %p1
264
264
ret void
265
265
}
266
+
267
+ ; Use non-zero shift amounts in v_lshl_add_u64.
268
+ define ptr @select_v_lshl_add_u64 (ptr %base , i64 %voffset ) {
269
+ ; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
270
+ ; GFX942_PTRADD: ; %bb.0:
271
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272
+ ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], 3, v[2:3]
273
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
274
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
275
+ ;
276
+ ; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
277
+ ; GFX942_LEGACY: ; %bb.0:
278
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279
+ ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
280
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
281
+ %gep = getelementptr inbounds i64 , ptr %base , i64 %voffset
282
+ ret ptr %gep
283
+ }
284
+
285
+ ; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
286
+ ; mul into a mul24.
287
+ define ptr @fold_mul24_into_mad (ptr %base , i64 %a , i64 %b ) {
288
+ ; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
289
+ ; GFX942_PTRADD: ; %bb.0:
290
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291
+ ; GFX942_PTRADD-NEXT: v_and_b32_e32 v2, 0xfffff, v2
292
+ ; GFX942_PTRADD-NEXT: v_and_b32_e32 v4, 0xfffff, v4
293
+ ; GFX942_PTRADD-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v4
294
+ ; GFX942_PTRADD-NEXT: v_mul_u32_u24_e32 v2, v2, v4
295
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
296
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
297
+ ;
298
+ ; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
299
+ ; GFX942_LEGACY: ; %bb.0:
300
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301
+ ; GFX942_LEGACY-NEXT: v_and_b32_e32 v2, 0xfffff, v2
302
+ ; GFX942_LEGACY-NEXT: v_and_b32_e32 v3, 0xfffff, v4
303
+ ; GFX942_LEGACY-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
304
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
305
+ %a_masked = and i64 %a , u0xfffff
306
+ %b_masked = and i64 %b , u0xfffff
307
+ %mul = mul i64 %a_masked , %b_masked
308
+ %gep = getelementptr inbounds i8 , ptr %base , i64 %mul
309
+ ret ptr %gep
310
+ }
0 commit comments