diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 9b23a5ab521c8..49d8aa04030b9 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(RISCVCodeGen RISCVISelDAGToDAG.cpp RISCVISelLowering.cpp RISCVLandingPadSetup.cpp + RISCVLoadStoreOptimizer.cpp RISCVMachineFunctionInfo.cpp RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8f7db34561749..1e279c70018a3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2750,6 +2750,42 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, .setMIFlags(MemI.getFlags()); } +// TODO: At the moment, MIPS introduced paring of instructions operating with +// word or double word. This should be extended with more instructions when more +// vendors support load/store pairing. +bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) { + switch (Opc) { + default: + return false; + case RISCV::SW: + case RISCV::SD: + case RISCV::LD: + case RISCV::LW: + return true; + } +} + +bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) + return false; + + if (LdSt.getOperand(1).isFI()) + return true; + + assert(LdSt.getOperand(1).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld x5,8(x5) + if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI)) + return false; + + if (!LdSt.getOperand(2).isImm()) + return false; + + return true; +} + bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index afbc8df50b452..7e2bb7259ae5c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -302,6 +302,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { bool isHighLatencyDef(int Opc) const override; + /// Return true if pairing the given load or store may be paired with another. + static bool isPairableLdStInstOpc(unsigned Opc); + + static bool isLdStSafeToPair(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI); + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp new file mode 100644 index 0000000000000..46fd3158e07ca --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -0,0 +1,403 @@ +//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Load/Store Pairing: It identifies pairs of load or store instructions +// operating on consecutive memory locations and merges them into a single +// paired instruction, leveraging hardware support for paired memory accesses. +// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. +// +// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as +// merging zero store instructions, promoting loads that read directly from a +// preceding store, and merging base register updates with load/store +// instructions (via pre-/post-indexed addressing). These advanced +// transformations are not yet implemented in the RISC-V pass but represent +// potential future enhancements for further optimizing RISC-V memory +// operations. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-load-store-opt" +#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer" + +// The LdStLimit limits number of instructions how far we search for load/store +// pairs. +static cl::opt LdStLimit("riscv-load-store-scan-limit", cl::init(128), + cl::Hidden); + +namespace { + +struct RISCVLoadStoreOpt : public MachineFunctionPass { + static char ID; + bool runOnMachineFunction(MachineFunction &Fn) override; + + RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } + + // Find and pair load/store instructions. + bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); + + // Convert load/store pairs to single instructions. + bool tryConvertToLdStPair(MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); + + // Scan the instructions looking for a load/store that can be combined + // with the current instruction into a load/store pair. + // Return the matching instruction if one is found, else MBB->end(). + MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, + bool &MergeForward); + + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, bool MergeForward); + +private: + AliasAnalysis *AA; + MachineRegisterInfo *MRI; + const RISCVInstrInfo *TII; + const RISCVRegisterInfo *TRI; + LiveRegUnits ModifiedRegUnits, UsedRegUnits; +}; +} // end anonymous namespace + +char RISCVLoadStoreOpt::ID = 0; +INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, + false) + +bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + const RISCVSubtarget &Subtarget = Fn.getSubtarget(); + if (!Subtarget.useLoadStorePairs()) + return false; + + bool MadeChange = false; + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + MRI = &Fn.getRegInfo(); + AA = &getAnalysis().getAAResults(); + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && + tryToPairLdStInst(MBBI)) + MadeChange = true; + else + ++MBBI; + } + } + return MadeChange; +} + +// Find loads and stores that can be merged into a single load or store pair +// instruction. +bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + + // If this is volatile, it is not a candidate. + if (MI.hasOrderedMemoryRef()) + return false; + + if (!TII->isLdStSafeToPair(MI, TRI)) + return false; + + // Look ahead for a pairable instruction. + MachineBasicBlock::iterator E = MI.getParent()->end(); + bool MergeForward; + MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward); + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, MergeForward); + return true; + } + return false; +} + +// Merge two adjacent load/store instructions into a paired instruction +// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of +// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the +// appropriate paired opcode, verifies that the memory operand is properly +// aligned, and checks that the offset is valid. If all conditions are met, it +// builds and inserts the paired instruction. +bool RISCVLoadStoreOpt::tryConvertToLdStPair( + MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { + unsigned PairOpc; + Align RequiredAlignment; + switch (First->getOpcode()) { + default: + llvm_unreachable("Unsupported load/store instruction for pairing"); + case RISCV::SW: + PairOpc = RISCV::MIPS_SWP; + RequiredAlignment = Align(8); + break; + case RISCV::LW: + PairOpc = RISCV::MIPS_LWP; + RequiredAlignment = Align(8); + break; + case RISCV::SD: + PairOpc = RISCV::MIPS_SDP; + RequiredAlignment = Align(16); + break; + case RISCV::LD: + PairOpc = RISCV::MIPS_LDP; + RequiredAlignment = Align(16); + break; + } + + MachineFunction *MF = First->getMF(); + const MachineMemOperand *MMO = *First->memoperands_begin(); + Align MMOAlign = MMO->getAlign(); + + if (MMOAlign < RequiredAlignment) + return false; + + int64_t Offset = First->getOperand(2).getImm(); + if (!isUInt<7>(Offset)) + return false; + + MachineInstrBuilder MIB = BuildMI( + *MF, + First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(), + TII->get(PairOpc)); + MIB.add(First->getOperand(0)) + .add(Second->getOperand(0)) + .add(First->getOperand(1)) + .add(First->getOperand(2)) + .cloneMergedMemRefs({&*First, &*Second}); + + First->getParent()->insert(First, MIB); + + First->removeFromParent(); + Second->removeFromParent(); + + return true; +} + +static bool mayAlias(MachineInstr &MIa, + SmallVectorImpl &MemInsns, + AliasAnalysis *AA) { + for (MachineInstr *MIb : MemInsns) + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + return true; + + return false; +} + +// Scan the instructions looking for a load/store that can be combined with the +// current instruction into a wider equivalent or a load/store pair. +// TODO: Extend pairing logic to consider reordering both instructions +// to a safe "middle" position rather than only merging forward/backward. +// This requires more sophisticated checks for aliasing, register +// liveness, and potential scheduling hazards. +MachineBasicBlock::iterator +RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, + bool &MergeForward) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator MBBI = I; + MachineInstr &FirstMI = *I; + MBBI = next_nodbg(MBBI, E); + + bool MayLoad = FirstMI.mayLoad(); + Register Reg = FirstMI.getOperand(0).getReg(); + Register BaseReg = FirstMI.getOperand(1).getReg(); + int64_t Offset = FirstMI.getOperand(2).getImm(); + int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); + + MergeForward = false; + + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + // Remember any instructions that read/write memory between FirstMI and MI. + SmallVector MemInsns; + + for (unsigned Count = 0; MBBI != E && Count < LdStLimit; + MBBI = next_nodbg(MBBI, E)) { + MachineInstr &MI = *MBBI; + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + if (MI.getOpcode() == FirstMI.getOpcode() && + TII->isLdStSafeToPair(MI, TRI)) { + Register MIBaseReg = MI.getOperand(1).getReg(); + int64_t MIOffset = MI.getOperand(2).getImm(); + + if (BaseReg == MIBaseReg) { + if ((Offset != MIOffset + OffsetStride) && + (Offset + OffsetStride != MIOffset)) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + MemInsns.push_back(&MI); + continue; + } + + // If the destination register of one load is the same register or a + // sub/super register of the other load, bail and keep looking. + if (MayLoad && + TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + MemInsns.push_back(&MI); + continue; + } + + // If the BaseReg has been modified, then we cannot do the optimization. + if (!ModifiedRegUnits.available(BaseReg)) + return E; + + // If the Rt of the second instruction was not modified or used between + // the two instructions and none of the instructions between the second + // and first alias with the second, we can combine the second into the + // first. + if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) && + !(MI.mayLoad() && + !UsedRegUnits.available(MI.getOperand(0).getReg())) && + !mayAlias(MI, MemInsns, AA)) { + + MergeForward = false; + return MBBI; + } + + // Likewise, if the Rt of the first instruction is not modified or used + // between the two instructions and none of the instructions between the + // first and the second alias with the first, we can combine the first + // into the second. + if (!(MayLoad && + !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) && + !mayAlias(FirstMI, MemInsns, AA)) { + + if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) { + MergeForward = true; + return MBBI; + } + } + // Unable to combine these instructions due to interference in between. + // Keep looking. + } + } + + // If the instruction wasn't a matching load or store. Stop searching if we + // encounter a call instruction that might modify memory. + if (MI.isCall()) + return E; + + // Update modified / uses register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + + // Otherwise, if the base register is modified, we have no match, so + // return early. + if (!ModifiedRegUnits.available(BaseReg)) + return E; + + // Update list of instructions that read/write memory. + if (MI.mayLoadOrStore()) + MemInsns.push_back(&MI); + } + return E; +} + +MachineBasicBlock::iterator +RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + bool MergeForward) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + // If NextI is the second of the two instructions to be merged, we need + // to skip one further. Either way we merge will invalidate the iterator, + // and we don't need to scan the new instruction, as it's a pairwise + // instruction, which we're not considering for further action anyway. + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + + // Insert our new paired instruction after whichever of the paired + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; + int Offset = I->getOperand(2).getImm(); + int PairedOffset = Paired->getOperand(2).getImm(); + bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; + + if (!MergeForward) + Paired->getOperand(1).setIsKill(false); + + // Kill flags may become invalid when moving stores for pairing. + if (I->getOperand(0).isUse()) { + if (!MergeForward) { + // Check if the Paired store's source register has a kill flag and clear + // it only if there are intermediate uses between I and Paired. + MachineOperand &PairedRegOp = Paired->getOperand(0); + if (PairedRegOp.isKill()) { + for (auto It = std::next(I); It != Paired; ++It) { + if (It->readsRegister(PairedRegOp.getReg(), TRI)) { + PairedRegOp.setIsKill(false); + break; + } + } + } + } else { + // Clear kill flags of the first store's register in the forward + // direction. + Register Reg = I->getOperand(0).getReg(); + for (MachineInstr &MI : make_range(std::next(I), std::next(Paired))) + MI.clearRegisterKills(Reg, TRI); + } + } + + MachineInstr *ToInsert = DeletionPoint->removeFromParent(); + MachineBasicBlock &MBB = *InsertionPoint->getParent(); + MachineBasicBlock::iterator First, Second; + + if (!InsertAfter) { + First = MBB.insert(InsertionPoint, ToInsert); + Second = InsertionPoint; + } else { + Second = MBB.insertAfter(InsertionPoint, ToInsert); + First = InsertionPoint; + } + + if (tryConvertToLdStPair(First, Second)) { + LLVM_DEBUG(dbgs() << "Pairing load/store:\n "); + LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); + } + + return NextI; +} + +// Returns an instance of the Load / Store Optimization pass. +FunctionPass *llvm::createRISCVLoadStoreOptPass() { + return new RISCVLoadStoreOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 1b54c278820fc..3c996c82fcec4 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -62,14 +62,14 @@ static cl::opt RISCVMinimumJumpTableEntries( "riscv-min-jump-table-entries", cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on RISCV")); -static cl::opt - UseMIPSLoadStorePairsOpt("mips-riscv-load-store-pairs", - cl::desc("RISCV: Optimize for load-store bonding"), - cl::init(false), cl::Hidden); +static cl::opt UseMIPSLoadStorePairsOpt( + "use-riscv-mips-load-store-pairs", + cl::desc("Enable the load/store pair optimization pass"), cl::init(false), + cl::Hidden); -static cl::opt - UseCCMovInsn("riscv-ccmov", cl::desc("RISCV: Use 'mips.ccmov' instruction"), - cl::init(true), cl::Hidden); +static cl::opt UseCCMovInsn("use-riscv-ccmov", + cl::desc("Use 'mips.ccmov' instruction"), + cl::init(true), cl::Hidden); void RISCVSubtarget::anchor() {} @@ -248,6 +248,10 @@ void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy, } } +bool RISCVSubtarget::useLoadStorePairs() const { + return UseMIPSLoadStorePairsOpt && HasVendorXMIPSLSP; +} + bool RISCVSubtarget::useCCMovInsn() const { return UseCCMovInsn && HasVendorXMIPSCMove; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 167dbb53c5950..2711bb4750571 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -144,6 +144,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVDAGToDAGISelLegacyPass(*PR); initializeRISCVMoveMergePass(*PR); initializeRISCVPushPopOptPass(*PR); + initializeRISCVLoadStoreOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT, @@ -550,6 +551,8 @@ void RISCVPassConfig::addPreSched2() { // Emit KCFI checks for indirect calls. addPass(createKCFIPass()); + if (TM->getOptLevel() != CodeGenOptLevel::None) + addPass(createRISCVLoadStoreOptPass()); } void RISCVPassConfig::addPreEmitPass() { diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 2646dfeca4eb6..2224d82dbbb8d 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -181,6 +181,7 @@ ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks +; CHECK-NEXT: RISC-V Load / Store Optimizer ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: PostRA Machine Instruction Scheduler diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll new file mode 100644 index 0000000000000..4aad0a8a0d05f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll @@ -0,0 +1,326 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32D +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D +; RUN: llc -mtriple=riscv32 -mattr=+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I_PAIR +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+d,+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32D_PAIR +; RUN: llc -mtriple=riscv64 -mattr=+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I_PAIR +; RUN: llc -mtriple=riscv64 -mcpu=mips-p8700 -mattr=+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64P_8700 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+d,+Xmipslsp -use-riscv-mips-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D_PAIR +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D_NOPAIR + +define void @testi(ptr %a) { +; RV32I-LABEL: testi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset s2, -4 +; RV32I-NEXT: .cfi_offset s3, -8 +; RV32I-NEXT: .cfi_offset s4, -12 +; RV32I-NEXT: .cfi_offset s5, -16 +; RV32I-NEXT: lw s3, 0(a0) +; RV32I-NEXT: lw s2, 4(a0) +; RV32I-NEXT: lw s5, 8(a0) +; RV32I-NEXT: lw s4, 12(a0) +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32D-LABEL: testi: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: addi sp, sp, -16 +; RV32D-NEXT: .cfi_def_cfa_offset 16 +; RV32D-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32D-NEXT: .cfi_offset s2, -4 +; RV32D-NEXT: .cfi_offset s3, -8 +; RV32D-NEXT: .cfi_offset s4, -12 +; RV32D-NEXT: .cfi_offset s5, -16 +; RV32D-NEXT: lw s3, 0(a0) +; RV32D-NEXT: lw s2, 4(a0) +; RV32D-NEXT: lw s5, 8(a0) +; RV32D-NEXT: lw s4, 12(a0) +; RV32D-NEXT: #APP +; RV32D-NEXT: #NO_APP +; RV32D-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32D-NEXT: .cfi_restore s2 +; RV32D-NEXT: .cfi_restore s3 +; RV32D-NEXT: .cfi_restore s4 +; RV32D-NEXT: .cfi_restore s5 +; RV32D-NEXT: addi sp, sp, 16 +; RV32D-NEXT: .cfi_def_cfa_offset 0 +; RV32D-NEXT: ret +; +; RV64I-LABEL: testi: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset s2, -8 +; RV64I-NEXT: .cfi_offset s3, -16 +; RV64I-NEXT: .cfi_offset s4, -24 +; RV64I-NEXT: .cfi_offset s5, -32 +; RV64I-NEXT: ld s3, 0(a0) +; RV64I-NEXT: ld s2, 8(a0) +; RV64I-NEXT: ld s5, 16(a0) +; RV64I-NEXT: ld s4, 24(a0) +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64D-LABEL: testi: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: addi sp, sp, -32 +; RV64D-NEXT: .cfi_def_cfa_offset 32 +; RV64D-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D-NEXT: .cfi_offset s2, -8 +; RV64D-NEXT: .cfi_offset s3, -16 +; RV64D-NEXT: .cfi_offset s4, -24 +; RV64D-NEXT: .cfi_offset s5, -32 +; RV64D-NEXT: ld s3, 0(a0) +; RV64D-NEXT: ld s2, 8(a0) +; RV64D-NEXT: ld s5, 16(a0) +; RV64D-NEXT: ld s4, 24(a0) +; RV64D-NEXT: #APP +; RV64D-NEXT: #NO_APP +; RV64D-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D-NEXT: .cfi_restore s2 +; RV64D-NEXT: .cfi_restore s3 +; RV64D-NEXT: .cfi_restore s4 +; RV64D-NEXT: .cfi_restore s5 +; RV64D-NEXT: addi sp, sp, 32 +; RV64D-NEXT: .cfi_def_cfa_offset 0 +; RV64D-NEXT: ret +; +; RV32I_PAIR-LABEL: testi: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: addi sp, sp, -16 +; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 16 +; RV32I_PAIR-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32I_PAIR-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32I_PAIR-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32I_PAIR-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I_PAIR-NEXT: .cfi_offset s2, -4 +; RV32I_PAIR-NEXT: .cfi_offset s3, -8 +; RV32I_PAIR-NEXT: .cfi_offset s4, -12 +; RV32I_PAIR-NEXT: .cfi_offset s5, -16 +; RV32I_PAIR-NEXT: mips.lwp s3, s2, 0(a0) +; RV32I_PAIR-NEXT: mips.lwp s5, s4, 8(a0) +; RV32I_PAIR-NEXT: #APP +; RV32I_PAIR-NEXT: #NO_APP +; RV32I_PAIR-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32I_PAIR-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32I_PAIR-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32I_PAIR-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32I_PAIR-NEXT: .cfi_restore s2 +; RV32I_PAIR-NEXT: .cfi_restore s3 +; RV32I_PAIR-NEXT: .cfi_restore s4 +; RV32I_PAIR-NEXT: .cfi_restore s5 +; RV32I_PAIR-NEXT: addi sp, sp, 16 +; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV32I_PAIR-NEXT: ret +; +; RV32D_PAIR-LABEL: testi: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: addi sp, sp, -16 +; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 16 +; RV32D_PAIR-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32D_PAIR-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32D_PAIR-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32D_PAIR-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32D_PAIR-NEXT: .cfi_offset s2, -4 +; RV32D_PAIR-NEXT: .cfi_offset s3, -8 +; RV32D_PAIR-NEXT: .cfi_offset s4, -12 +; RV32D_PAIR-NEXT: .cfi_offset s5, -16 +; RV32D_PAIR-NEXT: mips.lwp s3, s2, 0(a0) +; RV32D_PAIR-NEXT: mips.lwp s5, s4, 8(a0) +; RV32D_PAIR-NEXT: #APP +; RV32D_PAIR-NEXT: #NO_APP +; RV32D_PAIR-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32D_PAIR-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32D_PAIR-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32D_PAIR-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32D_PAIR-NEXT: .cfi_restore s2 +; RV32D_PAIR-NEXT: .cfi_restore s3 +; RV32D_PAIR-NEXT: .cfi_restore s4 +; RV32D_PAIR-NEXT: .cfi_restore s5 +; RV32D_PAIR-NEXT: addi sp, sp, 16 +; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV32D_PAIR-NEXT: ret +; +; RV64I_PAIR-LABEL: testi: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: addi sp, sp, -32 +; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64I_PAIR-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I_PAIR-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I_PAIR-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64I_PAIR-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I_PAIR-NEXT: .cfi_offset s2, -8 +; RV64I_PAIR-NEXT: .cfi_offset s3, -16 +; RV64I_PAIR-NEXT: .cfi_offset s4, -24 +; RV64I_PAIR-NEXT: .cfi_offset s5, -32 +; RV64I_PAIR-NEXT: mips.ldp s3, s2, 0(a0) +; RV64I_PAIR-NEXT: ld s5, 16(a0) +; RV64I_PAIR-NEXT: ld s4, 24(a0) +; RV64I_PAIR-NEXT: #APP +; RV64I_PAIR-NEXT: #NO_APP +; RV64I_PAIR-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I_PAIR-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I_PAIR-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64I_PAIR-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I_PAIR-NEXT: .cfi_restore s2 +; RV64I_PAIR-NEXT: .cfi_restore s3 +; RV64I_PAIR-NEXT: .cfi_restore s4 +; RV64I_PAIR-NEXT: .cfi_restore s5 +; RV64I_PAIR-NEXT: addi sp, sp, 32 +; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64I_PAIR-NEXT: ret +; +; RV64P_8700-LABEL: testi: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: addi sp, sp, -32 +; RV64P_8700-NEXT: .cfi_def_cfa_offset 32 +; RV64P_8700-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64P_8700-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64P_8700-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64P_8700-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64P_8700-NEXT: .cfi_offset s2, -8 +; RV64P_8700-NEXT: .cfi_offset s3, -16 +; RV64P_8700-NEXT: .cfi_offset s4, -24 +; RV64P_8700-NEXT: .cfi_offset s5, -32 +; RV64P_8700-NEXT: mips.ldp s3, s2, 0(a0) +; RV64P_8700-NEXT: ld s5, 16(a0) +; RV64P_8700-NEXT: ld s4, 24(a0) +; RV64P_8700-NEXT: #APP +; RV64P_8700-NEXT: #NO_APP +; RV64P_8700-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64P_8700-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64P_8700-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64P_8700-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64P_8700-NEXT: .cfi_restore s2 +; RV64P_8700-NEXT: .cfi_restore s3 +; RV64P_8700-NEXT: .cfi_restore s4 +; RV64P_8700-NEXT: .cfi_restore s5 +; RV64P_8700-NEXT: addi sp, sp, 32 +; RV64P_8700-NEXT: .cfi_def_cfa_offset 0 +; RV64P_8700-NEXT: ret +; +; RV64D_PAIR-LABEL: testi: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: addi sp, sp, -32 +; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64D_PAIR-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D_PAIR-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D_PAIR-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D_PAIR-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D_PAIR-NEXT: .cfi_offset s2, -8 +; RV64D_PAIR-NEXT: .cfi_offset s3, -16 +; RV64D_PAIR-NEXT: .cfi_offset s4, -24 +; RV64D_PAIR-NEXT: .cfi_offset s5, -32 +; RV64D_PAIR-NEXT: mips.ldp s3, s2, 0(a0) +; RV64D_PAIR-NEXT: ld s5, 16(a0) +; RV64D_PAIR-NEXT: ld s4, 24(a0) +; RV64D_PAIR-NEXT: #APP +; RV64D_PAIR-NEXT: #NO_APP +; RV64D_PAIR-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D_PAIR-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D_PAIR-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D_PAIR-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D_PAIR-NEXT: .cfi_restore s2 +; RV64D_PAIR-NEXT: .cfi_restore s3 +; RV64D_PAIR-NEXT: .cfi_restore s4 +; RV64D_PAIR-NEXT: .cfi_restore s5 +; RV64D_PAIR-NEXT: addi sp, sp, 32 +; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64D_PAIR-NEXT: ret +; +; RV64D_NOPAIR-LABEL: testi: +; RV64D_NOPAIR: # %bb.0: # %entry +; RV64D_NOPAIR-NEXT: addi sp, sp, -32 +; RV64D_NOPAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64D_NOPAIR-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D_NOPAIR-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D_NOPAIR-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D_NOPAIR-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D_NOPAIR-NEXT: .cfi_offset s2, -8 +; RV64D_NOPAIR-NEXT: .cfi_offset s3, -16 +; RV64D_NOPAIR-NEXT: .cfi_offset s4, -24 +; RV64D_NOPAIR-NEXT: .cfi_offset s5, -32 +; RV64D_NOPAIR-NEXT: ld s3, 0(a0) +; RV64D_NOPAIR-NEXT: ld s2, 8(a0) +; RV64D_NOPAIR-NEXT: ld s5, 16(a0) +; RV64D_NOPAIR-NEXT: ld s4, 24(a0) +; RV64D_NOPAIR-NEXT: #APP +; RV64D_NOPAIR-NEXT: #NO_APP +; RV64D_NOPAIR-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D_NOPAIR-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D_NOPAIR-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D_NOPAIR-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D_NOPAIR-NEXT: .cfi_restore s2 +; RV64D_NOPAIR-NEXT: .cfi_restore s3 +; RV64D_NOPAIR-NEXT: .cfi_restore s4 +; RV64D_NOPAIR-NEXT: .cfi_restore s5 +; RV64D_NOPAIR-NEXT: addi sp, sp, 32 +; RV64D_NOPAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64D_NOPAIR-NEXT: ret +entry: + %arrayidx = getelementptr inbounds ptr, ptr %a, i64 1 + %0 = load ptr, ptr %arrayidx, align 16 + %1 = load ptr, ptr %a, align 16 + %arrayidx2 = getelementptr inbounds ptr, ptr %a, i64 3 + %2 = load ptr, ptr %arrayidx2, align 16 + %arrayidx3 = getelementptr inbounds ptr, ptr %a, i64 2 + %3 = load ptr, ptr %arrayidx3, align 8 + tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(ptr %0, ptr %1, ptr %2, ptr %3) + ret void +}