Skip to content

Commit 07e7869

Browse files
committed
[NVPTX] add f32x2 version of fp-contract test
1 parent 6c142c8 commit 07e7869

File tree

1 file changed

+112
-0
lines changed

1 file changed

+112
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,FAST
3+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s --check-prefixes=CHECK,DEFAULT
4+
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -fp-contract=fast | %ptxas-verify %}
5+
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 | %ptxas-verify %}
6+
7+
target triple = "nvptx64-unknown-cuda"
8+
9+
;; FAST-LABEL: @t0
10+
;; DEFAULT-LABEL: @t0
11+
define <2 x float> @t0(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
12+
; FAST-LABEL: t0(
13+
; FAST: {
14+
; FAST-NEXT: .reg .b64 %rd<5>;
15+
; FAST-EMPTY:
16+
; FAST-NEXT: // %bb.0:
17+
; FAST-NEXT: ld.param.b64 %rd1, [t0_param_2];
18+
; FAST-NEXT: ld.param.b64 %rd2, [t0_param_1];
19+
; FAST-NEXT: ld.param.b64 %rd3, [t0_param_0];
20+
; FAST-NEXT: fma.rn.f32x2 %rd4, %rd3, %rd2, %rd1;
21+
; FAST-NEXT: st.param.b64 [func_retval0], %rd4;
22+
; FAST-NEXT: ret;
23+
;
24+
; DEFAULT-LABEL: t0(
25+
; DEFAULT: {
26+
; DEFAULT-NEXT: .reg .b64 %rd<6>;
27+
; DEFAULT-EMPTY:
28+
; DEFAULT-NEXT: // %bb.0:
29+
; DEFAULT-NEXT: ld.param.b64 %rd1, [t0_param_2];
30+
; DEFAULT-NEXT: ld.param.b64 %rd2, [t0_param_1];
31+
; DEFAULT-NEXT: ld.param.b64 %rd3, [t0_param_0];
32+
; DEFAULT-NEXT: mul.rn.f32x2 %rd4, %rd3, %rd2;
33+
; DEFAULT-NEXT: add.rn.f32x2 %rd5, %rd4, %rd1;
34+
; DEFAULT-NEXT: st.param.b64 [func_retval0], %rd5;
35+
; DEFAULT-NEXT: ret;
36+
%v0 = fmul <2 x float> %a, %b
37+
%v1 = fadd <2 x float> %v0, %c
38+
ret <2 x float> %v1
39+
}
40+
41+
;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32x2
42+
;; to prevent ptxas from fusing this with anything else.
43+
define <2 x float> @t1(<2 x float> %a, <2 x float> %b) {
44+
; FAST-LABEL: t1(
45+
; FAST: {
46+
; FAST-NEXT: .reg .b64 %rd<6>;
47+
; FAST-EMPTY:
48+
; FAST-NEXT: // %bb.0:
49+
; FAST-NEXT: ld.param.b64 %rd1, [t1_param_1];
50+
; FAST-NEXT: ld.param.b64 %rd2, [t1_param_0];
51+
; FAST-NEXT: add.f32x2 %rd3, %rd2, %rd1;
52+
; FAST-NEXT: sub.f32x2 %rd4, %rd2, %rd1;
53+
; FAST-NEXT: mul.f32x2 %rd5, %rd3, %rd4;
54+
; FAST-NEXT: st.param.b64 [func_retval0], %rd5;
55+
; FAST-NEXT: ret;
56+
;
57+
; DEFAULT-LABEL: t1(
58+
; DEFAULT: {
59+
; DEFAULT-NEXT: .reg .b64 %rd<6>;
60+
; DEFAULT-EMPTY:
61+
; DEFAULT-NEXT: // %bb.0:
62+
; DEFAULT-NEXT: ld.param.b64 %rd1, [t1_param_1];
63+
; DEFAULT-NEXT: ld.param.b64 %rd2, [t1_param_0];
64+
; DEFAULT-NEXT: add.rn.f32x2 %rd3, %rd2, %rd1;
65+
; DEFAULT-NEXT: sub.rn.f32x2 %rd4, %rd2, %rd1;
66+
; DEFAULT-NEXT: mul.rn.f32x2 %rd5, %rd3, %rd4;
67+
; DEFAULT-NEXT: st.param.b64 [func_retval0], %rd5;
68+
; DEFAULT-NEXT: ret;
69+
%v1 = fadd <2 x float> %a, %b
70+
%v2 = fsub <2 x float> %a, %b
71+
%v3 = fmul <2 x float> %v1, %v2
72+
ret <2 x float> %v3
73+
}
74+
75+
;; Make sure we generate the non ".rn" version when the "contract" flag is
76+
;; present on the instructions
77+
define <2 x float> @t2(<2 x float> %a, <2 x float> %b) {
78+
; CHECK-LABEL: t2(
79+
; CHECK: {
80+
; CHECK-NEXT: .reg .b64 %rd<6>;
81+
; CHECK-EMPTY:
82+
; CHECK-NEXT: // %bb.0:
83+
; CHECK-NEXT: ld.param.b64 %rd1, [t2_param_1];
84+
; CHECK-NEXT: ld.param.b64 %rd2, [t2_param_0];
85+
; CHECK-NEXT: add.f32x2 %rd3, %rd2, %rd1;
86+
; CHECK-NEXT: sub.f32x2 %rd4, %rd2, %rd1;
87+
; CHECK-NEXT: mul.f32x2 %rd5, %rd3, %rd4;
88+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd5;
89+
; CHECK-NEXT: ret;
90+
%v1 = fadd contract <2 x float> %a, %b
91+
%v2 = fsub contract <2 x float> %a, %b
92+
%v3 = fmul contract <2 x float> %v1, %v2
93+
ret <2 x float> %v3
94+
}
95+
96+
;; Make sure we always fold to fma when the "contract" flag is present
97+
define <2 x float> @t3(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
98+
; CHECK-LABEL: t3(
99+
; CHECK: {
100+
; CHECK-NEXT: .reg .b64 %rd<5>;
101+
; CHECK-EMPTY:
102+
; CHECK-NEXT: // %bb.0:
103+
; CHECK-NEXT: ld.param.b64 %rd1, [t3_param_2];
104+
; CHECK-NEXT: ld.param.b64 %rd2, [t3_param_1];
105+
; CHECK-NEXT: ld.param.b64 %rd3, [t3_param_0];
106+
; CHECK-NEXT: fma.rn.f32x2 %rd4, %rd3, %rd2, %rd1;
107+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
108+
; CHECK-NEXT: ret;
109+
%v0 = fmul contract <2 x float> %a, %b
110+
%v1 = fadd contract <2 x float> %v0, %c
111+
ret <2 x float> %v1
112+
}

0 commit comments

Comments
 (0)