From 8e2a8c203a2821288d543da119f48751b3c8ad02 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Mon, 10 Jun 2024 11:06:14 +0000 Subject: [PATCH 1/2] [AArch64] Disable red-zone when lowering Q-reg copy through memory. This was pointed out in PR #93940. --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index cd532671f5018..65e3bbf4e9c35 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -431,8 +431,15 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); uint64_t NumBytes = AFI->getLocalStackSize(); + // If neither NEON or SVE are available, a COPY from one Q-reg to + // another requires a spill -> reload sequence. We can do that + // using a pre-decrementing store/post-decrementing load, but + // if we do so, we can't use the Red Zone. + bool LowerQRegCopyThroughMem = + !Subtarget.isNeonAvailable() && !Subtarget.hasSVE(); + return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF)); + getSVEStackSize(MF) || LowerQRegCopyThroughMem); } /// hasFP - Return true if the specified function should have a dedicated frame From d4f576a1b5497eb108bf9104447fc3d34e8ccb9a Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Mon, 10 Jun 2024 15:58:42 +0000 Subject: [PATCH 2/2] Add hasFPARMv8 to avoid impact on soft-fp code --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 5 +++-- llvm/test/CodeGen/AArch64/arm64-redzone.ll | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 65e3bbf4e9c35..cf617c7e92a70 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -435,8 +435,9 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { // another requires a spill -> reload sequence. We can do that // using a pre-decrementing store/post-decrementing load, but // if we do so, we can't use the Red Zone. - bool LowerQRegCopyThroughMem = - !Subtarget.isNeonAvailable() && !Subtarget.hasSVE(); + bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() && + !Subtarget.isNeonAvailable() && + !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || getSVEStackSize(MF) || LowerQRegCopyThroughMem); diff --git a/llvm/test/CodeGen/AArch64/arm64-redzone.ll b/llvm/test/CodeGen/AArch64/arm64-redzone.ll index fe30a1a98521e..d001bc2a8dbe4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-redzone.ll +++ b/llvm/test/CodeGen/AArch64/arm64-redzone.ll @@ -16,3 +16,16 @@ define i32 @foo(i32 %a, i32 %b) nounwind ssp { %tmp2 = load i32, ptr %x, align 4 ret i32 %tmp2 } + +; We disable red-zone if NEON is available because copies of Q-regs +; require a spill/fill and dynamic allocation. But we only need to do +; this when FP registers are enabled. +define void @bar(fp128 %f) "target-features"="-fp-armv8" { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x0, x1, [sp, #-16] +; CHECK-NEXT: ret + %ptr = alloca fp128 + store fp128 %f, ptr %ptr + ret void +}