Skip to content

Commit 6baaa0a

Browse files
authored
[ARM] Handle roundeven for MVE. (#142557)
Now that #141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
1 parent 718fd90 commit 6baaa0a

File tree

7 files changed

+83
-24
lines changed

7 files changed

+83
-24
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
710710
defm vrndpq: vrnd<IRIntBase<"ceil", [Vector]>, "p">;
711711
defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
712712
defm vrndxq: vrnd<IRIntBase<"rint", [Vector]>, "x">;
713-
defm vrndnq: vrnd<IRInt<"vrintn", [Vector]>, "n">;
713+
defm vrndnq: vrnd<IRIntBase<"roundeven", [Vector]>, "n">;
714714

715715
multiclass compare_with_pred<string condname, dag arguments,
716716
dag cmp, string suffix> {

clang/test/CodeGen/arm-mve-intrinsics/vrnd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a)
148148

149149
// CHECK-LABEL: @test_vrndnq_f16(
150150
// CHECK-NEXT: entry:
151-
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]])
151+
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]])
152152
// CHECK-NEXT: ret <8 x half> [[TMP0]]
153153
//
154154
float16x8_t test_vrndnq_f16(float16x8_t a)
@@ -162,7 +162,7 @@ float16x8_t test_vrndnq_f16(float16x8_t a)
162162

163163
// CHECK-LABEL: @test_vrndnq_f32(
164164
// CHECK-NEXT: entry:
165-
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]])
165+
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]])
166166
// CHECK-NEXT: ret <4 x float> [[TMP0]]
167167
//
168168
float32x4_t test_vrndnq_f32(float32x4_t a)

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in {
13061306
[llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
13071307
}
13081308

1309-
def int_arm_mve_vrintn: DefaultAttrsIntrinsic<
1310-
[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
13111309
def int_arm_mve_vcls: DefaultAttrsIntrinsic<
13121310
[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
13131311

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
767767
return false; // Not 'arm.mve.vctp64'.
768768
}
769769

770+
if (Name.starts_with("vrintn.v")) {
771+
NewFn = Intrinsic::getOrInsertDeclaration(
772+
F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
773+
return true;
774+
}
775+
770776
// These too are changed to accept a v2i1 instead of the old v4i1.
771777
if (Name.consume_back(".v4i1")) {
772778
// 'arm.mve.*.v4i1'.

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
35273527
}
35283528

35293529
multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
3530-
defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
3530+
defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;
35313531
defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
35323532
defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
35333533
defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;

llvm/test/CodeGen/Thumb2/mve-frint.ll

Lines changed: 71 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -424,21 +424,74 @@ entry:
424424
ret <2 x double> %0
425425
}
426426

427-
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
428-
declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
429-
declare <4 x float> @llvm.rint.v4f32(<4 x float>)
430-
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
431-
declare <4 x float> @llvm.floor.v4f32(<4 x float>)
432-
declare <4 x float> @llvm.round.v4f32(<4 x float>)
433-
declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
434-
declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
435-
declare <8 x half> @llvm.rint.v8f16(<8 x half>)
436-
declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
437-
declare <8 x half> @llvm.floor.v8f16(<8 x half>)
438-
declare <8 x half> @llvm.round.v8f16(<8 x half>)
439-
declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
440-
declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
441-
declare <2 x double> @llvm.rint.v2f64(<2 x double>)
442-
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
443-
declare <2 x double> @llvm.floor.v2f64(<2 x double>)
444-
declare <2 x double> @llvm.round.v2f64(<2 x double>)
427+
define arm_aapcs_vfpcc <4 x float> @froundeven_float32_t(<4 x float> %src) {
428+
; CHECK-MVE-LABEL: froundeven_float32_t:
429+
; CHECK-MVE: @ %bb.0: @ %entry
430+
; CHECK-MVE-NEXT: vrintn.f32 s3, s3
431+
; CHECK-MVE-NEXT: vrintn.f32 s2, s2
432+
; CHECK-MVE-NEXT: vrintn.f32 s1, s1
433+
; CHECK-MVE-NEXT: vrintn.f32 s0, s0
434+
; CHECK-MVE-NEXT: bx lr
435+
;
436+
; CHECK-MVEFP-LABEL: froundeven_float32_t:
437+
; CHECK-MVEFP: @ %bb.0: @ %entry
438+
; CHECK-MVEFP-NEXT: vrintn.f32 q0, q0
439+
; CHECK-MVEFP-NEXT: bx lr
440+
entry:
441+
%0 = call fast <4 x float> @llvm.roundeven.v4f32(<4 x float> %src)
442+
ret <4 x float> %0
443+
}
444+
445+
define arm_aapcs_vfpcc <8 x half> @froundeven_float16_t(<8 x half> %src) {
446+
; CHECK-MVE-LABEL: froundeven_float16_t:
447+
; CHECK-MVE: @ %bb.0: @ %entry
448+
; CHECK-MVE-NEXT: vmovx.f16 s4, s0
449+
; CHECK-MVE-NEXT: vrintn.f16 s0, s0
450+
; CHECK-MVE-NEXT: vrintn.f16 s4, s4
451+
; CHECK-MVE-NEXT: vins.f16 s0, s4
452+
; CHECK-MVE-NEXT: vmovx.f16 s4, s1
453+
; CHECK-MVE-NEXT: vrintn.f16 s4, s4
454+
; CHECK-MVE-NEXT: vrintn.f16 s1, s1
455+
; CHECK-MVE-NEXT: vins.f16 s1, s4
456+
; CHECK-MVE-NEXT: vmovx.f16 s4, s2
457+
; CHECK-MVE-NEXT: vrintn.f16 s4, s4
458+
; CHECK-MVE-NEXT: vrintn.f16 s2, s2
459+
; CHECK-MVE-NEXT: vins.f16 s2, s4
460+
; CHECK-MVE-NEXT: vmovx.f16 s4, s3
461+
; CHECK-MVE-NEXT: vrintn.f16 s4, s4
462+
; CHECK-MVE-NEXT: vrintn.f16 s3, s3
463+
; CHECK-MVE-NEXT: vins.f16 s3, s4
464+
; CHECK-MVE-NEXT: bx lr
465+
;
466+
; CHECK-MVEFP-LABEL: froundeven_float16_t:
467+
; CHECK-MVEFP: @ %bb.0: @ %entry
468+
; CHECK-MVEFP-NEXT: vrintn.f16 q0, q0
469+
; CHECK-MVEFP-NEXT: bx lr
470+
entry:
471+
%0 = call fast <8 x half> @llvm.roundeven.v8f16(<8 x half> %src)
472+
ret <8 x half> %0
473+
}
474+
475+
define arm_aapcs_vfpcc <2 x double> @froundeven_float64_t(<2 x double> %src) {
476+
; CHECK-LABEL: froundeven_float64_t:
477+
; CHECK: @ %bb.0: @ %entry
478+
; CHECK-NEXT: .save {r7, lr}
479+
; CHECK-NEXT: push {r7, lr}
480+
; CHECK-NEXT: .vsave {d8, d9}
481+
; CHECK-NEXT: vpush {d8, d9}
482+
; CHECK-NEXT: vmov q4, q0
483+
; CHECK-NEXT: vmov r0, r1, d9
484+
; CHECK-NEXT: bl roundeven
485+
; CHECK-NEXT: vmov r2, r3, d8
486+
; CHECK-NEXT: vmov d9, r0, r1
487+
; CHECK-NEXT: mov r0, r2
488+
; CHECK-NEXT: mov r1, r3
489+
; CHECK-NEXT: bl roundeven
490+
; CHECK-NEXT: vmov d8, r0, r1
491+
; CHECK-NEXT: vmov q0, q4
492+
; CHECK-NEXT: vpop {d8, d9}
493+
; CHECK-NEXT: pop {r7, pc}
494+
entry:
495+
%0 = call fast <2 x double> @llvm.roundeven.v2f64(<2 x double> %src)
496+
ret <2 x double> %0
497+
}

llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
33

4+
; The llvm.arm.mve.vrintn should auto-upgrade to llvm.roundeven, which are selected to vrintn.
5+
46
define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) {
57
; CHECK-LABEL: test_vrndnq_f16:
68
; CHECK: @ %bb.0: @ %entry

0 commit comments

Comments
 (0)