Skip to content

Commit b1a78b2

Browse files
committed
[AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
1 parent 88860bc commit b1a78b2

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) {
263263
store float 1.0, ptr addrspace(1) %p1
264264
ret void
265265
}
266+
267+
; Use non-zero shift amounts in v_lshl_add_u64.
268+
define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
269+
; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
270+
; GFX942_PTRADD: ; %bb.0:
271+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272+
; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], 3, v[2:3]
273+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
274+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
275+
;
276+
; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
277+
; GFX942_LEGACY: ; %bb.0:
278+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279+
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
280+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
281+
%gep = getelementptr inbounds i64, ptr %base, i64 %voffset
282+
ret ptr %gep
283+
}
284+
285+
; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
286+
; mul into a mul24.
287+
define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
288+
; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
289+
; GFX942_PTRADD: ; %bb.0:
290+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291+
; GFX942_PTRADD-NEXT: v_and_b32_e32 v2, 0xfffff, v2
292+
; GFX942_PTRADD-NEXT: v_and_b32_e32 v4, 0xfffff, v4
293+
; GFX942_PTRADD-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v4
294+
; GFX942_PTRADD-NEXT: v_mul_u32_u24_e32 v2, v2, v4
295+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
296+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
297+
;
298+
; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
299+
; GFX942_LEGACY: ; %bb.0:
300+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301+
; GFX942_LEGACY-NEXT: v_and_b32_e32 v2, 0xfffff, v2
302+
; GFX942_LEGACY-NEXT: v_and_b32_e32 v3, 0xfffff, v4
303+
; GFX942_LEGACY-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
304+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
305+
%a_masked = and i64 %a, u0xfffff
306+
%b_masked = and i64 %b, u0xfffff
307+
%mul = mul i64 %a_masked, %b_masked
308+
%gep = getelementptr inbounds i8, ptr %base, i64 %mul
309+
ret ptr %gep
310+
}

0 commit comments

Comments
 (0)