Skip to content

Commit 76c9bfe

Browse files
authored
[NVPTX] Remove Float register classes (#140487)
These classes are redundant, as the untyped "Int" classes can be used for all float operations. This change is intended to be as minimal as possible and leaves the many potential simplifications and refactors this exposes as future work.
1 parent 3ce74c3 commit 76c9bfe

File tree

96 files changed

+9137
-7314
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+9137
-7314
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,6 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
223223
Ret = (3 << 28);
224224
} else if (RC == &NVPTX::Int64RegsRegClass) {
225225
Ret = (4 << 28);
226-
} else if (RC == &NVPTX::Float32RegsRegClass) {
227-
Ret = (5 << 28);
228-
} else if (RC == &NVPTX::Float64RegsRegClass) {
229-
Ret = (6 << 28);
230226
} else if (RC == &NVPTX::Int128RegsRegClass) {
231227
Ret = (7 << 28);
232228
} else {

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -606,8 +606,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
606606
addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
607607
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
608608
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
609-
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
610-
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
609+
addRegisterClass(MVT::f32, &NVPTX::Int32RegsRegClass);
610+
addRegisterClass(MVT::f64, &NVPTX::Int64RegsRegClass);
611611
addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
612612
addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
613613
addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
@@ -4992,24 +4992,21 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
49924992
case 'b':
49934993
return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
49944994
case 'c':
4995-
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
49964995
case 'h':
49974996
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
49984997
case 'r':
4998+
case 'f':
49994999
return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
50005000
case 'l':
50015001
case 'N':
5002+
case 'd':
50025003
return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
50035004
case 'q': {
50045005
if (STI.getSmVersion() < 70)
50055006
report_fatal_error("Inline asm with 128 bit operands is only "
50065007
"supported for sm_70 and higher!");
50075008
return std::make_pair(0U, &NVPTX::Int128RegsRegClass);
50085009
}
5009-
case 'f':
5010-
return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
5011-
case 'd':
5012-
return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
50135010
}
50145011
}
50155012
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,11 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
4444
} else if (DestRC == &NVPTX::Int16RegsRegClass) {
4545
Op = NVPTX::MOV16r;
4646
} else if (DestRC == &NVPTX::Int32RegsRegClass) {
47-
Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32r
48-
: NVPTX::BITCONVERT_32_F2I);
47+
Op = NVPTX::IMOV32r;
4948
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
50-
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64r
51-
: NVPTX::BITCONVERT_64_F2I);
49+
Op = NVPTX::IMOV64r;
5250
} else if (DestRC == &NVPTX::Int128RegsRegClass) {
5351
Op = NVPTX::IMOV128r;
54-
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
55-
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32r
56-
: NVPTX::BITCONVERT_32_I2F);
57-
} else if (DestRC == &NVPTX::Float64RegsRegClass) {
58-
Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64r
59-
: NVPTX::BITCONVERT_64_I2F);
6052
} else {
6153
llvm_unreachable("Bad register copy");
6254
}

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ using namespace llvm;
2525

2626
namespace llvm {
2727
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
28-
if (RC == &NVPTX::Float32RegsRegClass)
29-
return ".b32";
30-
if (RC == &NVPTX::Float64RegsRegClass)
31-
return ".b64";
3228
if (RC == &NVPTX::Int128RegsRegClass)
3329
return ".b128";
3430
if (RC == &NVPTX::Int64RegsRegClass)
@@ -63,10 +59,6 @@ StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
6359
}
6460

6561
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC) {
66-
if (RC == &NVPTX::Float32RegsRegClass)
67-
return "%f";
68-
if (RC == &NVPTX::Float64RegsRegClass)
69-
return "%fd";
7062
if (RC == &NVPTX::Int128RegsRegClass)
7163
return "%rq";
7264
if (RC == &NVPTX::Int64RegsRegClass)

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ foreach i = 0...4 in {
4040
def RQ#i : NVPTXReg<"%rq"#i>; // 128-bit
4141
def H#i : NVPTXReg<"%h"#i>; // 16-bit float
4242
def HH#i : NVPTXReg<"%hh"#i>; // 2x16-bit float
43-
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
44-
def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float
4543

4644
// Arguments
4745
def ia#i : NVPTXReg<"%ia"#i>;
@@ -59,14 +57,13 @@ foreach i = 0...31 in {
5957
//===----------------------------------------------------------------------===//
6058
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
6159
def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
62-
def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8], 32,
60+
def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32,
6361
(add (sequence "R%u", 0, 4),
6462
VRFrame32, VRFrameLocal32)>;
65-
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
63+
def Int64Regs : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
6664
// 128-bit regs are not defined as general regs in NVPTX. They are used for inlineASM only.
6765
def Int128Regs : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>;
68-
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
69-
def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>;
66+
7067
def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
7168
def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 4))>;
7269
def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
@@ -75,3 +72,6 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
7572
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
7673
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame32, VRFrameLocal32, VRDepot,
7774
(sequence "ENVREG%u", 0, 31))>;
75+
76+
defvar Float32Regs = Int32Regs;
77+
defvar Float64Regs = Int64Regs;

llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
---
1313
name: test
1414
registers:
15-
- { id: 0, class: float32regs }
16-
- { id: 1, class: float32regs }
15+
- { id: 0, class: int32regs }
16+
- { id: 1, class: int32regs }
1717
body: |
1818
bb.0.entry:
1919
%0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0

llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,24 @@
3030
---
3131
name: test
3232
registers:
33-
- { id: 0, class: float32regs }
34-
- { id: 1, class: float64regs }
33+
- { id: 0, class: int32regs }
34+
- { id: 1, class: int64regs }
3535
- { id: 2, class: int32regs }
36-
- { id: 3, class: float64regs }
37-
- { id: 4, class: float32regs }
38-
- { id: 5, class: float32regs }
39-
- { id: 6, class: float32regs }
40-
- { id: 7, class: float32regs }
36+
- { id: 3, class: int64regs }
37+
- { id: 4, class: int32regs }
38+
- { id: 5, class: int32regs }
39+
- { id: 6, class: int32regs }
40+
- { id: 7, class: int32regs }
4141
body: |
4242
bb.0.entry:
4343
%0 = LD_f32 0, 0, 4, 2, 32, &test_param_0, 0
4444
%1 = CVT_f64_f32 %0, 0
4545
%2 = LD_i32 0, 0, 4, 0, 32, &test_param_1, 0
46-
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00
46+
; CHECK: %3:int64regs = FADD_rnf64ri %1, double 3.250000e+00
4747
%3 = FADD_rnf64ri %1, double 3.250000e+00
4848
%4 = CVT_f32_f64 %3, 5
4949
%5 = CVT_f32_s32 %2, 5
50-
; CHECK: %6:float32regs = FADD_rnf32ri %5, float 6.250000e+00
50+
; CHECK: %6:int32regs = FADD_rnf32ri %5, float 6.250000e+00
5151
%6 = FADD_rnf32ri %5, float 6.250000e+00
5252
%7 = FMUL_rnf32rr %6, %4
5353
StoreRetvalF32 %7, 0
@@ -56,24 +56,24 @@ body: |
5656
---
5757
name: test2
5858
registers:
59-
- { id: 0, class: float32regs }
60-
- { id: 1, class: float64regs }
59+
- { id: 0, class: int32regs }
60+
- { id: 1, class: int64regs }
6161
- { id: 2, class: int32regs }
62-
- { id: 3, class: float64regs }
63-
- { id: 4, class: float32regs }
64-
- { id: 5, class: float32regs }
65-
- { id: 6, class: float32regs }
66-
- { id: 7, class: float32regs }
62+
- { id: 3, class: int64regs }
63+
- { id: 4, class: int32regs }
64+
- { id: 5, class: int32regs }
65+
- { id: 6, class: int32regs }
66+
- { id: 7, class: int32regs }
6767
body: |
6868
bb.0.entry:
6969
%0 = LD_f32 0, 0, 4, 2, 32, &test2_param_0, 0
7070
%1 = CVT_f64_f32 %0, 0
7171
%2 = LD_i32 0, 0, 4, 0, 32, &test2_param_1, 0
72-
; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
72+
; CHECK: %3:int64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
7373
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
7474
%4 = CVT_f32_f64 %3, 5
7575
%5 = CVT_f32_s32 %2, 5
76-
; CHECK: %6:float32regs = FADD_rnf32ri %5, float 0x7FF8000000000000
76+
; CHECK: %6:int32regs = FADD_rnf32ri %5, float 0x7FF8000000000000
7777
%6 = FADD_rnf32ri %5, float 0x7FF8000000000000
7878
%7 = FMUL_rnf32rr %6, %4
7979
StoreRetvalF32 %7, 0

llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
---
1313
name: test
1414
registers:
15-
- { id: 0, class: float32regs }
16-
- { id: 1, class: float32regs }
15+
- { id: 0, class: int32regs }
16+
- { id: 1, class: int32regs }
1717
body: |
1818
bb.0.entry:
1919
%0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0

llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -45,36 +45,36 @@ define half @fh(ptr %p) {
4545
; ENABLED-LABEL: fh(
4646
; ENABLED: {
4747
; ENABLED-NEXT: .reg .b16 %rs<10>;
48-
; ENABLED-NEXT: .reg .b32 %f<13>;
48+
; ENABLED-NEXT: .reg .b32 %r<13>;
4949
; ENABLED-NEXT: .reg .b64 %rd<2>;
5050
; ENABLED-EMPTY:
5151
; ENABLED-NEXT: // %bb.0:
5252
; ENABLED-NEXT: ld.param.b64 %rd1, [fh_param_0];
5353
; ENABLED-NEXT: ld.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
5454
; ENABLED-NEXT: ld.b16 %rs5, [%rd1+8];
55-
; ENABLED-NEXT: cvt.f32.f16 %f1, %rs2;
56-
; ENABLED-NEXT: cvt.f32.f16 %f2, %rs1;
57-
; ENABLED-NEXT: add.rn.f32 %f3, %f2, %f1;
58-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs6, %f3;
59-
; ENABLED-NEXT: cvt.f32.f16 %f4, %rs4;
60-
; ENABLED-NEXT: cvt.f32.f16 %f5, %rs3;
61-
; ENABLED-NEXT: add.rn.f32 %f6, %f5, %f4;
62-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs7, %f6;
63-
; ENABLED-NEXT: cvt.f32.f16 %f7, %rs7;
64-
; ENABLED-NEXT: cvt.f32.f16 %f8, %rs6;
65-
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f7;
66-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs8, %f9;
67-
; ENABLED-NEXT: cvt.f32.f16 %f10, %rs8;
68-
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
69-
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
70-
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
55+
; ENABLED-NEXT: cvt.f32.f16 %r1, %rs2;
56+
; ENABLED-NEXT: cvt.f32.f16 %r2, %rs1;
57+
; ENABLED-NEXT: add.rn.f32 %r3, %r2, %r1;
58+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs6, %r3;
59+
; ENABLED-NEXT: cvt.f32.f16 %r4, %rs4;
60+
; ENABLED-NEXT: cvt.f32.f16 %r5, %rs3;
61+
; ENABLED-NEXT: add.rn.f32 %r6, %r5, %r4;
62+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs7, %r6;
63+
; ENABLED-NEXT: cvt.f32.f16 %r7, %rs7;
64+
; ENABLED-NEXT: cvt.f32.f16 %r8, %rs6;
65+
; ENABLED-NEXT: add.rn.f32 %r9, %r8, %r7;
66+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs8, %r9;
67+
; ENABLED-NEXT: cvt.f32.f16 %r10, %rs8;
68+
; ENABLED-NEXT: cvt.f32.f16 %r11, %rs5;
69+
; ENABLED-NEXT: add.rn.f32 %r12, %r10, %r11;
70+
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %r12;
7171
; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
7272
; ENABLED-NEXT: ret;
7373
;
7474
; DISABLED-LABEL: fh(
7575
; DISABLED: {
7676
; DISABLED-NEXT: .reg .b16 %rs<10>;
77-
; DISABLED-NEXT: .reg .b32 %f<13>;
77+
; DISABLED-NEXT: .reg .b32 %r<13>;
7878
; DISABLED-NEXT: .reg .b64 %rd<2>;
7979
; DISABLED-EMPTY:
8080
; DISABLED-NEXT: // %bb.0:
@@ -84,22 +84,22 @@ define half @fh(ptr %p) {
8484
; DISABLED-NEXT: ld.b16 %rs3, [%rd1+4];
8585
; DISABLED-NEXT: ld.b16 %rs4, [%rd1+6];
8686
; DISABLED-NEXT: ld.b16 %rs5, [%rd1+8];
87-
; DISABLED-NEXT: cvt.f32.f16 %f1, %rs2;
88-
; DISABLED-NEXT: cvt.f32.f16 %f2, %rs1;
89-
; DISABLED-NEXT: add.rn.f32 %f3, %f2, %f1;
90-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs6, %f3;
91-
; DISABLED-NEXT: cvt.f32.f16 %f4, %rs4;
92-
; DISABLED-NEXT: cvt.f32.f16 %f5, %rs3;
93-
; DISABLED-NEXT: add.rn.f32 %f6, %f5, %f4;
94-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs7, %f6;
95-
; DISABLED-NEXT: cvt.f32.f16 %f7, %rs7;
96-
; DISABLED-NEXT: cvt.f32.f16 %f8, %rs6;
97-
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f7;
98-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs8, %f9;
99-
; DISABLED-NEXT: cvt.f32.f16 %f10, %rs8;
100-
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
101-
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
102-
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
87+
; DISABLED-NEXT: cvt.f32.f16 %r1, %rs2;
88+
; DISABLED-NEXT: cvt.f32.f16 %r2, %rs1;
89+
; DISABLED-NEXT: add.rn.f32 %r3, %r2, %r1;
90+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs6, %r3;
91+
; DISABLED-NEXT: cvt.f32.f16 %r4, %rs4;
92+
; DISABLED-NEXT: cvt.f32.f16 %r5, %rs3;
93+
; DISABLED-NEXT: add.rn.f32 %r6, %r5, %r4;
94+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs7, %r6;
95+
; DISABLED-NEXT: cvt.f32.f16 %r7, %rs7;
96+
; DISABLED-NEXT: cvt.f32.f16 %r8, %rs6;
97+
; DISABLED-NEXT: add.rn.f32 %r9, %r8, %r7;
98+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs8, %r9;
99+
; DISABLED-NEXT: cvt.f32.f16 %r10, %rs8;
100+
; DISABLED-NEXT: cvt.f32.f16 %r11, %rs5;
101+
; DISABLED-NEXT: add.rn.f32 %r12, %r10, %r11;
102+
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %r12;
103103
; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
104104
; DISABLED-NEXT: ret;
105105
%p.1 = getelementptr half, ptr %p, i32 1
@@ -121,37 +121,37 @@ define half @fh(ptr %p) {
121121
define float @ff(ptr %p) {
122122
; ENABLED-LABEL: ff(
123123
; ENABLED: {
124-
; ENABLED-NEXT: .reg .b32 %f<10>;
124+
; ENABLED-NEXT: .reg .b32 %r<10>;
125125
; ENABLED-NEXT: .reg .b64 %rd<2>;
126126
; ENABLED-EMPTY:
127127
; ENABLED-NEXT: // %bb.0:
128128
; ENABLED-NEXT: ld.param.b64 %rd1, [ff_param_0];
129-
; ENABLED-NEXT: ld.v4.b32 {%f1, %f2, %f3, %f4}, [%rd1];
130-
; ENABLED-NEXT: ld.b32 %f5, [%rd1+16];
131-
; ENABLED-NEXT: add.rn.f32 %f6, %f1, %f2;
132-
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
133-
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
134-
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
135-
; ENABLED-NEXT: st.param.b32 [func_retval0], %f9;
129+
; ENABLED-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
130+
; ENABLED-NEXT: ld.b32 %r5, [%rd1+16];
131+
; ENABLED-NEXT: add.rn.f32 %r6, %r1, %r2;
132+
; ENABLED-NEXT: add.rn.f32 %r7, %r3, %r4;
133+
; ENABLED-NEXT: add.rn.f32 %r8, %r6, %r7;
134+
; ENABLED-NEXT: add.rn.f32 %r9, %r8, %r5;
135+
; ENABLED-NEXT: st.param.b32 [func_retval0], %r9;
136136
; ENABLED-NEXT: ret;
137137
;
138138
; DISABLED-LABEL: ff(
139139
; DISABLED: {
140-
; DISABLED-NEXT: .reg .b32 %f<10>;
140+
; DISABLED-NEXT: .reg .b32 %r<10>;
141141
; DISABLED-NEXT: .reg .b64 %rd<2>;
142142
; DISABLED-EMPTY:
143143
; DISABLED-NEXT: // %bb.0:
144144
; DISABLED-NEXT: ld.param.b64 %rd1, [ff_param_0];
145-
; DISABLED-NEXT: ld.b32 %f1, [%rd1];
146-
; DISABLED-NEXT: ld.b32 %f2, [%rd1+4];
147-
; DISABLED-NEXT: ld.b32 %f3, [%rd1+8];
148-
; DISABLED-NEXT: ld.b32 %f4, [%rd1+12];
149-
; DISABLED-NEXT: ld.b32 %f5, [%rd1+16];
150-
; DISABLED-NEXT: add.rn.f32 %f6, %f1, %f2;
151-
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
152-
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
153-
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
154-
; DISABLED-NEXT: st.param.b32 [func_retval0], %f9;
145+
; DISABLED-NEXT: ld.b32 %r1, [%rd1];
146+
; DISABLED-NEXT: ld.b32 %r2, [%rd1+4];
147+
; DISABLED-NEXT: ld.b32 %r3, [%rd1+8];
148+
; DISABLED-NEXT: ld.b32 %r4, [%rd1+12];
149+
; DISABLED-NEXT: ld.b32 %r5, [%rd1+16];
150+
; DISABLED-NEXT: add.rn.f32 %r6, %r1, %r2;
151+
; DISABLED-NEXT: add.rn.f32 %r7, %r3, %r4;
152+
; DISABLED-NEXT: add.rn.f32 %r8, %r6, %r7;
153+
; DISABLED-NEXT: add.rn.f32 %r9, %r8, %r5;
154+
; DISABLED-NEXT: st.param.b32 [func_retval0], %r9;
155155
; DISABLED-NEXT: ret;
156156
%p.1 = getelementptr float, ptr %p, i32 1
157157
%p.2 = getelementptr float, ptr %p, i32 2

0 commit comments

Comments
 (0)