diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h index fd424163f0d19..e9f0f089adfef 100644 --- a/llvm/include/llvm/CodeGen/ModuloSchedule.h +++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h @@ -370,6 +370,78 @@ class PeelingModuloScheduleExpander { std::unique_ptr LoopInfo; }; +/// Expand the kernel using modulo variable expansion algorithm (MVE). +/// It unrolls the kernel enough to avoid overlap of register lifetime. +class ModuloScheduleExpanderMVE { +private: + using ValueMapTy = DenseMap; + using MBBVectorTy = SmallVectorImpl; + using InstrMapTy = DenseMap; + + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII = nullptr; + LiveIntervals &LIS; + + MachineBasicBlock *OrigKernel = nullptr; + MachineBasicBlock *OrigPreheader = nullptr; + MachineBasicBlock *OrigExit = nullptr; + MachineBasicBlock *Check = nullptr; + MachineBasicBlock *Prolog = nullptr; + MachineBasicBlock *NewKernel = nullptr; + MachineBasicBlock *Epilog = nullptr; + MachineBasicBlock *NewPreheader = nullptr; + MachineBasicBlock *NewExit = nullptr; + std::unique_ptr LoopInfo; + + /// The number of unroll required to avoid overlap of live ranges. + /// NumUnroll = 1 means no unrolling. + int NumUnroll; + + void calcNumUnroll(); + void generatePipelinedLoop(); + void generateProlog(SmallVectorImpl &VRMap); + void generatePhi(MachineInstr *OrigMI, int UnrollNum, + SmallVectorImpl &PrologVRMap, + SmallVectorImpl &KernelVRMap, + SmallVectorImpl &PhiVRMap); + void generateKernel(SmallVectorImpl &PrologVRMap, + SmallVectorImpl &KernelVRMap, + InstrMapTy &LastStage0Insts); + void generateEpilog(SmallVectorImpl &KernelVRMap, + SmallVectorImpl &EpilogVRMap, + InstrMapTy &LastStage0Insts); + void mergeRegUsesAfterPipeline(Register OrigReg, Register NewReg); + + MachineInstr *cloneInstr(MachineInstr *OldMI); + + void updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef); + + void generateKernelPhi(Register OrigLoopVal, Register NewLoopVal, + unsigned UnrollNum, + SmallVectorImpl &VRMapProlog, + SmallVectorImpl &VRMapPhi); + void updateInstrUse(MachineInstr *MI, int StageNum, int PhaseNum, + SmallVectorImpl &CurVRMap, + SmallVectorImpl *PrevVRMap); + + void insertCondBranch(MachineBasicBlock &MBB, int RequiredTC, + InstrMapTy &LastStage0Insts, + MachineBasicBlock &GreaterThan, + MachineBasicBlock &Otherwise); + +public: + ModuloScheduleExpanderMVE(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals &LIS) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS) {} + + void expand(); + static bool canApply(MachineLoop &L); +}; + /// Expander that simply annotates each scheduled instruction with a post-instr /// symbol that can be consumed by the ModuloScheduleTest pass. /// diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index d5b1df2114e9e..75cb17f357241 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -767,6 +767,26 @@ class TargetInstrInfo : public MCInstrInfo { createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond) = 0; + /// Create a condition to determine if the remaining trip count for a phase + /// is greater than TC. Some instructions such as comparisons may be + /// inserted at the bottom of MBB. All instructions expanded for the + /// phase must be inserted in MBB before calling this function. + /// LastStage0Insts is the map from the original instructions scheduled at + /// stage#0 to the expanded instructions for the last iteration of the + /// kernel. LastStage0Insts is intended to obtain the instruction that + /// refers the latest loop counter value. + /// + /// MBB can also be a predecessor of the prologue block. Then + /// LastStage0Insts must be empty and the compared value is the initial + /// value of the trip count. + virtual void createRemainingIterationsGreaterCondition( + int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond, + DenseMap &LastStage0Insts) { + llvm_unreachable( + "Target didn't implement " + "PipelinerLoopInfo::createRemainingIterationsGreaterCondition!"); + } + /// Modify the loop such that the trip count is /// OriginalTC + TripCountAdjust. virtual void adjustTripCount(int TripCountAdjust) = 0; @@ -780,6 +800,10 @@ class TargetInstrInfo : public MCInstrInfo { /// Once this function is called, no other functions on this object are /// valid; the loop has been removed. virtual void disposed() = 0; + + /// Return true if the target can expand pipelined schedule with modulo + /// variable expansion. + virtual bool isMVEExpanderSupported() { return false; } }; /// Analyze loop L, which must be a single-basic-block loop, and if the diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 4f7d9d070cee6..5ef67e1a005a9 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -192,6 +192,10 @@ static cl::opt cl::desc("Margin representing the unused percentage of " "the register pressure limit")); +static cl::opt + MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), + cl::desc("Use the MVE code generator for software pipelining")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -677,6 +681,11 @@ void SwingSchedulerDAG::schedule() { if (ExperimentalCodeGen && NewInstrChanges.empty()) { PeelingModuloScheduleExpander MSE(MF, MS, &LIS); MSE.expand(); + } else if (MVECodeGen && NewInstrChanges.empty() && + LoopPipelinerInfo->isMVEExpanderSupported() && + ModuloScheduleExpanderMVE::canApply(Loop)) { + ModuloScheduleExpanderMVE MSE(MF, MS, LIS); + MSE.expand(); } else { ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges)); MSE.expand(); diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index b912112b16362..0aed235ec39b5 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -22,6 +22,10 @@ #define DEBUG_TYPE "pipeliner" using namespace llvm; +static cl::opt SwapBranchTargetsMVE( + "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), + cl::desc("Swap target blocks of a conditional branch for MVE expander")); + void ModuloSchedule::print(raw_ostream &OS) { for (MachineInstr *MI : ScheduledInstrs) OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI; @@ -2097,6 +2101,642 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { MSE.cleanup(); } +MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + + // TODO: Offset information needs to be corrected. + NewMI->dropMemRefs(MF); + + return NewMI; +} + +/// Create a dedicated exit for Loop. Exit is the original exit for Loop. +/// If it is already dedicated exit, return it. Otherwise, insert a new +/// block between them and return the new block. +static MachineBasicBlock *createDedicatedExit(MachineBasicBlock *Loop, + MachineBasicBlock *Exit) { + if (Exit->pred_size() == 1) + return Exit; + + MachineFunction *MF = Loop->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + MachineBasicBlock *NewExit = + MF->CreateMachineBasicBlock(Loop->getBasicBlock()); + MF->insert(Loop->getIterator(), NewExit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + TII->analyzeBranch(*Loop, TBB, FBB, Cond); + if (TBB == Loop) + FBB = NewExit; + else if (FBB == Loop) + TBB = NewExit; + else + llvm_unreachable("unexpected loop structure"); + TII->removeBranch(*Loop); + TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc()); + Loop->replaceSuccessor(Exit, NewExit); + TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc()); + NewExit->addSuccessor(Exit); + + Exit->replacePhiUsesWith(Loop, NewExit); + + return NewExit; +} + +/// Insert branch code into the end of MBB. It branches to GreaterThan if the +/// remaining trip count for instructions in LastStage0Insts is greater than +/// RequiredTC, and to Otherwise otherwise. +void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB, + int RequiredTC, + InstrMapTy &LastStage0Insts, + MachineBasicBlock &GreaterThan, + MachineBasicBlock &Otherwise) { + SmallVector Cond; + LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond, + LastStage0Insts); + + if (SwapBranchTargetsMVE) { + // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and + // FBB for optimal performance. + if (TII->reverseBranchCondition(Cond)) + llvm_unreachable("can not reverse branch condition"); + TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc()); + } else { + TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc()); + } +} + +/// Generate a pipelined loop that is unrolled by using MVE algorithm and any +/// other necessary blocks. The control flow is modified to execute the +/// pipelined loop if the trip count satisfies the condition, otherwise the +/// original loop. The original loop is also used to execute the remainder +/// iterations which occur due to unrolling. +void ModuloScheduleExpanderMVE::generatePipelinedLoop() { + // The control flow for pipelining with MVE: + // + // OrigPreheader: + // // The block that is originally the loop preheader + // goto Check + // + // Check: + // // Check whether the trip count satisfies the requirements to pipeline. + // if (LoopCounter > NumStages + NumUnroll - 2) + // // The minimum number of iterations to pipeline = + // // iterations executed in prolog/epilog (NumStages-1) + + // // iterations executed in one kernel run (NumUnroll) + // goto Prolog + // // fallback to the original loop + // goto NewPreheader + // + // Prolog: + // // All prolog stages. There are no direct branches to the epilogue. + // goto NewKernel + // + // NewKernel: + // // NumUnroll copies of the kernel + // if (LoopCounter > MVE-1) + // goto NewKernel + // goto Epilog + // + // Epilog: + // // All epilog stages. + // if (LoopCounter > 0) + // // The remainder is executed in the original loop + // goto NewPreheader + // goto NewExit + // + // NewPreheader: + // // Newly created preheader for the original loop. + // // The initial values of the phis in the loop are merged from two paths. + // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog + // goto OrigKernel + // + // OrigKernel: + // // The original loop block. + // if (LoopCounter != 0) + // goto OrigKernel + // goto NewExit + // + // NewExit: + // // Newly created dedicated exit for the original loop. + // // Merge values which are referenced after the loop + // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog + // goto OrigExit + // + // OrigExit: + // // The block that is originally the loop exit. + // // If it is already deicated exit, NewExit is not created. + + // An example of where each stage is executed: + // Assume #Stages 3, #MVE 4, #Iterations 12 + // Iter 0 1 2 3 4 5 6 7 8 9 10-11 + // ------------------------------------------------- + // Stage 0 Prolog#0 + // Stage 1 0 Prolog#1 + // Stage 2 1 0 Kernel Unroll#0 Iter#0 + // Stage 2 1 0 Kernel Unroll#1 Iter#0 + // Stage 2 1 0 Kernel Unroll#2 Iter#0 + // Stage 2 1 0 Kernel Unroll#3 Iter#0 + // Stage 2 1 0 Kernel Unroll#0 Iter#1 + // Stage 2 1 0 Kernel Unroll#1 Iter#1 + // Stage 2 1 0 Kernel Unroll#2 Iter#1 + // Stage 2 1 0 Kernel Unroll#3 Iter#1 + // Stage 2 1 Epilog#0 + // Stage 2 Epilog#1 + // Stage 0-2 OrigKernel + + LoopInfo = TII->analyzeLoopForPipelining(OrigKernel); + assert(LoopInfo && "Must be able to analyze loop!"); + + calcNumUnroll(); + + Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock()); + + MF.insert(OrigKernel->getIterator(), Check); + MF.insert(OrigKernel->getIterator(), Prolog); + MF.insert(OrigKernel->getIterator(), NewKernel); + MF.insert(OrigKernel->getIterator(), Epilog); + MF.insert(OrigKernel->getIterator(), NewPreheader); + + NewExit = createDedicatedExit(OrigKernel, OrigExit); + + NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader); + TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc()); + + OrigPreheader->addSuccessor(Check); + TII->removeBranch(*OrigPreheader); + TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc()); + + Check->addSuccessor(Prolog); + Check->addSuccessor(NewPreheader); + + Prolog->addSuccessor(NewKernel); + + NewKernel->addSuccessor(NewKernel); + NewKernel->addSuccessor(Epilog); + + Epilog->addSuccessor(NewPreheader); + Epilog->addSuccessor(NewExit); + + InstrMapTy LastStage0Insts; + insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2, + LastStage0Insts, *Prolog, *NewPreheader); + + // VRMaps map (prolog/kernel/epilog phase#, original register#) to new + // register# + SmallVector PrologVRMap, KernelVRMap, EpilogVRMap; + generateProlog(PrologVRMap); + generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts); + generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts); +} + +/// Replace MI's use operands according to the maps. +void ModuloScheduleExpanderMVE::updateInstrUse( + MachineInstr *MI, int StageNum, int PhaseNum, + SmallVectorImpl &CurVRMap, + SmallVectorImpl *PrevVRMap) { + // If MI is in the prolog/kernel/epilog block, CurVRMap is + // PrologVRMap/KernelVRMap/EpilogVRMap respectively. + // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively. + // Refer to the appropriate map according to the stage difference between + // MI and the definition of an operand. + + for (MachineOperand &UseMO : MI->uses()) { + if (!UseMO.isReg() || !UseMO.getReg().isVirtual()) + continue; + int DiffStage = 0; + Register OrigReg = UseMO.getReg(); + MachineInstr *DefInst = MRI.getVRegDef(OrigReg); + if (!DefInst || DefInst->getParent() != OrigKernel) + continue; + unsigned InitReg = 0; + unsigned DefReg = OrigReg; + if (DefInst->isPHI()) { + ++DiffStage; + unsigned LoopReg; + getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg); + // LoopReg is guaranteed to be defined within the loop by canApply() + DefReg = LoopReg; + DefInst = MRI.getVRegDef(LoopReg); + } + unsigned DefStageNum = Schedule.getStage(DefInst); + DiffStage += StageNum - DefStageNum; + Register NewReg; + if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg)) + // NewReg is defined in a previous phase of the same block + NewReg = CurVRMap[PhaseNum - DiffStage][DefReg]; + else if (!PrevVRMap) + // Since this is the first iteration, refer the initial register of the + // loop + NewReg = InitReg; + else + // Cases where DiffStage is larger than PhaseNum. + // If MI is in the kernel block, the value is defined by the previous + // iteration and PhiVRMap is referenced. If MI is in the epilog block, the + // value is defined in the kernel block and KernelVRMap is referenced. + NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg]; + + const TargetRegisterClass *NRC = + MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg)); + if (NRC) + UseMO.setReg(NewReg); + else { + Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg)); + BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + SplitReg) + .addReg(NewReg); + UseMO.setReg(SplitReg); + } + } +} + +/// Return a phi if Reg is referenced by the phi. +/// canApply() guarantees that at most only one such phi exists. +static MachineInstr *getLoopPhiUser(Register Reg, MachineBasicBlock *Loop) { + for (MachineInstr &Phi : Loop->phis()) { + unsigned InitVal, LoopVal; + getPhiRegs(Phi, Loop, InitVal, LoopVal); + if (LoopVal == Reg) + return Φ + } + return nullptr; +} + +/// Generate phis for registers defined by OrigMI. +void ModuloScheduleExpanderMVE::generatePhi( + MachineInstr *OrigMI, int UnrollNum, + SmallVectorImpl &PrologVRMap, + SmallVectorImpl &KernelVRMap, + SmallVectorImpl &PhiVRMap) { + int StageNum = Schedule.getStage(OrigMI); + bool UsePrologReg; + if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum) + UsePrologReg = true; + else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum) + UsePrologReg = false; + else + return; + + // Examples that show which stages are merged by phi. + // Meaning of the symbol following the stage number: + // a/b: Stages with the same letter are merged (UsePrologReg == true) + // +: Merged with the initial value (UsePrologReg == false) + // *: No phis required + // + // #Stages 3, #MVE 4 + // Iter 0 1 2 3 4 5 6 7 8 + // ----------------------------------------- + // Stage 0a Prolog#0 + // Stage 1a 0b Prolog#1 + // Stage 2* 1* 0* Kernel Unroll#0 + // Stage 2* 1* 0+ Kernel Unroll#1 + // Stage 2* 1+ 0a Kernel Unroll#2 + // Stage 2+ 1a 0b Kernel Unroll#3 + // + // #Stages 3, #MVE 2 + // Iter 0 1 2 3 4 5 6 7 8 + // ----------------------------------------- + // Stage 0a Prolog#0 + // Stage 1a 0b Prolog#1 + // Stage 2* 1+ 0a Kernel Unroll#0 + // Stage 2+ 1a 0b Kernel Unroll#1 + // + // #Stages 3, #MVE 1 + // Iter 0 1 2 3 4 5 6 7 8 + // ----------------------------------------- + // Stage 0* Prolog#0 + // Stage 1a 0b Prolog#1 + // Stage 2+ 1a 0b Kernel Unroll#0 + + for (MachineOperand &DefMO : OrigMI->defs()) { + if (!DefMO.isReg() || DefMO.isDead()) + continue; + Register OrigReg = DefMO.getReg(); + auto NewReg = KernelVRMap[UnrollNum].find(OrigReg); + if (NewReg == KernelVRMap[UnrollNum].end()) + continue; + Register CorrespondReg; + if (UsePrologReg) { + int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1; + CorrespondReg = PrologVRMap[PrologNum][OrigReg]; + } else { + MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel); + if (!Phi) + continue; + CorrespondReg = getInitPhiReg(*Phi, OrigKernel); + } + + assert(CorrespondReg.isValid()); + Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg)); + BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), PhiReg) + .addReg(NewReg->second) + .addMBB(NewKernel) + .addReg(CorrespondReg) + .addMBB(Prolog); + PhiVRMap[UnrollNum][OrigReg] = PhiReg; + } +} + +static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, + MachineBasicBlock *NewMBB) { + for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) { + if (Phi.getOperand(Idx).getReg() == OrigReg) { + Phi.getOperand(Idx).setReg(NewReg); + Phi.getOperand(Idx + 1).setMBB(NewMBB); + return; + } + } +} + +/// Generate phis that merge values from multiple routes +void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg, + Register NewReg) { + SmallVector UsesAfterLoop; + SmallVector LoopPhis; + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg), + E = MRI.use_end(); + I != E; ++I) { + MachineOperand &O = *I; + if (O.getParent()->getParent() != OrigKernel && + O.getParent()->getParent() != Prolog && + O.getParent()->getParent() != NewKernel && + O.getParent()->getParent() != Epilog) + UsesAfterLoop.push_back(&O); + if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI()) + LoopPhis.push_back(O.getParent()); + } + + // Merge the route that only execute the pipelined loop (when there are no + // remaining iterations) with the route that execute the original loop. + if (!UsesAfterLoop.empty()) { + Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg)); + BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), PhiReg) + .addReg(OrigReg) + .addMBB(OrigKernel) + .addReg(NewReg) + .addMBB(Epilog); + + for (MachineOperand *MO : UsesAfterLoop) + MO->setReg(PhiReg); + + if (!LIS.hasInterval(PhiReg)) + LIS.createEmptyInterval(PhiReg); + } + + // Merge routes from the pipelined loop and the bypassed route before the + // original loop + if (!LoopPhis.empty()) { + for (MachineInstr *Phi : LoopPhis) { + unsigned InitReg, LoopReg; + getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg); + Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg)); + BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(), + TII->get(TargetOpcode::PHI), NewInit) + .addReg(InitReg) + .addMBB(Check) + .addReg(NewReg) + .addMBB(Epilog); + replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader); + } + } +} + +void ModuloScheduleExpanderMVE::generateProlog( + SmallVectorImpl &PrologVRMap) { + PrologVRMap.clear(); + PrologVRMap.resize(Schedule.getNumStages() - 1); + DenseMap> NewMIMap; + for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1; + ++PrologNum) { + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + if (StageNum > PrologNum) + continue; + MachineInstr *NewMI = cloneInstr(MI); + updateInstrDef(NewMI, PrologVRMap[PrologNum], false); + NewMIMap[NewMI] = {PrologNum, StageNum}; + Prolog->push_back(NewMI); + } + } + + for (auto I : NewMIMap) { + MachineInstr *MI = I.first; + int PrologNum = I.second.first; + int StageNum = I.second.second; + updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr); + } + + LLVM_DEBUG({ + dbgs() << "prolog:\n"; + Prolog->dump(); + }); +} + +void ModuloScheduleExpanderMVE::generateKernel( + SmallVectorImpl &PrologVRMap, + SmallVectorImpl &KernelVRMap, InstrMapTy &LastStage0Insts) { + KernelVRMap.clear(); + KernelVRMap.resize(NumUnroll); + SmallVector PhiVRMap; + PhiVRMap.resize(NumUnroll); + DenseMap> NewMIMap; + DenseMap MIMapLastStage0; + for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) { + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + MachineInstr *NewMI = cloneInstr(MI); + if (UnrollNum == NumUnroll - 1) + LastStage0Insts[MI] = NewMI; + updateInstrDef(NewMI, KernelVRMap[UnrollNum], + (UnrollNum == NumUnroll - 1 && StageNum == 0)); + generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap); + NewMIMap[NewMI] = {UnrollNum, StageNum}; + NewKernel->push_back(NewMI); + } + } + + for (auto I : NewMIMap) { + MachineInstr *MI = I.first; + int UnrollNum = I.second.first; + int StageNum = I.second.second; + updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap); + } + + // If remaining trip count is greater than NumUnroll-1, loop continues + insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel, + *Epilog); + + LLVM_DEBUG({ + dbgs() << "kernel:\n"; + NewKernel->dump(); + }); +} + +void ModuloScheduleExpanderMVE::generateEpilog( + SmallVectorImpl &KernelVRMap, + SmallVectorImpl &EpilogVRMap, InstrMapTy &LastStage0Insts) { + EpilogVRMap.clear(); + EpilogVRMap.resize(Schedule.getNumStages() - 1); + DenseMap> NewMIMap; + for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1; + ++EpilogNum) { + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + if (StageNum <= EpilogNum) + continue; + MachineInstr *NewMI = cloneInstr(MI); + updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum); + NewMIMap[NewMI] = {EpilogNum, StageNum}; + Epilog->push_back(NewMI); + } + } + + for (auto I : NewMIMap) { + MachineInstr *MI = I.first; + int EpilogNum = I.second.first; + int StageNum = I.second.second; + updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap); + } + + // If there are remaining iterations, they are executed in the original loop. + // Instructions related to loop control, such as loop counter comparison, + // are indicated by shouldIgnoreForPipelining() and are assumed to be placed + // in stage 0. Thus, the map is for the last one in the kernel. + insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit); + + LLVM_DEBUG({ + dbgs() << "epilog:\n"; + Epilog->dump(); + }); +} + +/// Calculate the number of unroll required and set it to NumUnroll +void ModuloScheduleExpanderMVE::calcNumUnroll() { + DenseMap Inst2Idx; + NumUnroll = 1; + for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I) + Inst2Idx[Schedule.getInstructions()[I]] = I; + + for (MachineInstr *MI : Schedule.getInstructions()) { + if (MI->isPHI()) + continue; + int StageNum = Schedule.getStage(MI); + for (const MachineOperand &MO : MI->uses()) { + if (!MO.isReg() || !MO.getReg().isVirtual()) + continue; + MachineInstr *DefMI = MRI.getVRegDef(MO.getReg()); + if (DefMI->getParent() != OrigKernel) + continue; + + int NumUnrollLocal = 1; + if (DefMI->isPHI()) { + ++NumUnrollLocal; + // canApply() guarantees that DefMI is not phi and is an instruction in + // the loop + DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel)); + } + NumUnrollLocal += StageNum - Schedule.getStage(DefMI); + if (Inst2Idx[MI] <= Inst2Idx[DefMI]) + --NumUnrollLocal; + NumUnroll = std::max(NumUnroll, NumUnrollLocal); + } + } + LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n"); +} + +/// Create new virtual registers for definitions of NewMI and update NewMI. +/// If the definitions are referenced after the pipelined loop, phis are +/// created to merge with other routes. +void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI, + ValueMapTy &VRMap, + bool LastDef) { + for (MachineOperand &MO : NewMI->operands()) { + if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + Register NewReg = MRI.createVirtualRegister(RC); + MO.setReg(NewReg); + VRMap[Reg] = NewReg; + if (LastDef) + mergeRegUsesAfterPipeline(Reg, NewReg); + } +} + +void ModuloScheduleExpanderMVE::expand() { + OrigKernel = Schedule.getLoop()->getTopBlock(); + OrigPreheader = Schedule.getLoop()->getLoopPreheader(); + OrigExit = Schedule.getLoop()->getExitBlock(); + + LLVM_DEBUG(Schedule.dump()); + + generatePipelinedLoop(); +} + +/// Check if ModuloScheduleExpanderMVE can be applied to L +bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) { + if (!L.getExitBlock()) { + LLVM_DEBUG( + dbgs() << "Can not apply MVE expander: No single exit block.\n";); + return false; + } + + MachineBasicBlock *BB = L.getTopBlock(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + // Put some constraints on the operands of the phis to simplify the + // transformation + DenseSet UsedByPhi; + for (MachineInstr &MI : BB->phis()) { + // Registers defined by phis must be used only inside the loop and be never + // used by phis. + for (MachineOperand &MO : MI.defs()) + if (MO.isReg()) + for (MachineInstr &Ref : MRI.use_instructions(MO.getReg())) + if (Ref.getParent() != BB || Ref.isPHI()) { + LLVM_DEBUG(dbgs() + << "Can not apply MVE expander: A phi result is " + "referenced outside of the loop or by phi.\n";); + return false; + } + + // A source register from the loop block must be defined inside the loop. + // A register defined inside the loop must be referenced by only one phi at + // most. + unsigned InitVal, LoopVal; + getPhiRegs(MI, MI.getParent(), InitVal, LoopVal); + if (!Register(LoopVal).isVirtual() || + MRI.getVRegDef(LoopVal)->getParent() != BB) { + LLVM_DEBUG( + dbgs() << "Can not apply MVE expander: A phi source value coming " + "from the loop is not defined in the loop.\n";); + return false; + } + if (UsedByPhi.count(LoopVal)) { + LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the " + "loop is referenced by two or more phis.\n";); + return false; + } + UsedByPhi.insert(LoopVal); + } + + return true; +} + //===----------------------------------------------------------------------===// // ModuloScheduleTestPass implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 7d540efe2b41e..a5135b78bded9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -17,6 +17,7 @@ #include "AArch64PointerAuth.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -9582,18 +9583,49 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI, namespace { class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { - MachineInstr *PredBranch; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo &MRI; + + /// The block of the loop + MachineBasicBlock *LoopBB; + /// The conditional branch of the loop + MachineInstr *CondBranch; + /// The compare instruction for loop control + MachineInstr *Comp; + /// The number of the operand of the loop counter value in Comp + unsigned CompCounterOprNum; + /// The instruction that updates the loop counter value + MachineInstr *Update; + /// The number of the operand of the loop counter value in Update + unsigned UpdateCounterOprNum; + /// The initial value of the loop counter + Register Init; + /// True iff Update is a predecessor of Comp + bool IsUpdatePriorComp; + + /// The normalized condition used by createTripCountGreaterCondition() SmallVector Cond; public: - AArch64PipelinerLoopInfo(MachineInstr *PredBranch, + AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch, + MachineInstr *Comp, unsigned CompCounterOprNum, + MachineInstr *Update, unsigned UpdateCounterOprNum, + Register Init, bool IsUpdatePriorComp, const SmallVectorImpl &Cond) - : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {} + : MF(Comp->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()), + TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()), + LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp), + CompCounterOprNum(CompCounterOprNum), Update(Update), + UpdateCounterOprNum(UpdateCounterOprNum), Init(Init), + IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {} bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { // Make the instructions for loop control be placed in stage 0. - // The predecessors of PredBranch are considered by the caller. - return MI == PredBranch; + // The predecessors of Comp are considered by the caller. + return MI == Comp; } std::optional createTripCountGreaterCondition( @@ -9606,31 +9638,277 @@ class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { return {}; } + void createRemainingIterationsGreaterCondition( + int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond, + DenseMap &LastStage0Insts) override; + void setPreheader(MachineBasicBlock *NewPreheader) override {} void adjustTripCount(int TripCountAdjust) override {} void disposed() override {} + bool isMVEExpanderSupported() override { return true; } }; } // namespace -static bool isCompareAndBranch(unsigned Opcode) { - switch (Opcode) { - case AArch64::CBZW: - case AArch64::CBZX: - case AArch64::CBNZW: - case AArch64::CBNZX: - case AArch64::TBZW: - case AArch64::TBZX: - case AArch64::TBNZW: - case AArch64::TBNZX: - return true; +/// Clone an instruction from MI. The register of ReplaceOprNum-th operand +/// is replaced by ReplaceReg. The output register is newly created. +/// The other operands are unchanged from MI. +static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, + Register ReplaceReg, MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertTo) { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI); + Register Result = 0; + for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) { + if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) { + Result = MRI.createVirtualRegister( + MRI.getRegClass(NewMI->getOperand(0).getReg())); + NewMI->getOperand(I).setReg(Result); + } else if (I == ReplaceOprNum) { + MRI.constrainRegClass( + ReplaceReg, + TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent())); + NewMI->getOperand(I).setReg(ReplaceReg); + } } - return false; + MBB.insert(InsertTo, NewMI); + return Result; +} + +void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition( + int TC, MachineBasicBlock &MBB, SmallVectorImpl &Cond, + DenseMap &LastStage0Insts) { + // Create and accumulate conditions for next TC iterations. + // Example: + // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last + // # iteration of the kernel + // + // # insert the following instructions + // cond = CSINCXr 0, 0, C, implicit $nzcv + // counter = ADDXri counter, 1 # clone from this->Update + // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp + // cond = CSINCXr cond, cond, C, implicit $nzcv + // ... (repeat TC times) + // SUBSXri cond, 0, implicit-def $nzcv + + assert(CondBranch->getOpcode() == AArch64::Bcc); + // CondCode to exit the loop + AArch64CC::CondCode CC = + (AArch64CC::CondCode)CondBranch->getOperand(0).getImm(); + if (CondBranch->getOperand(1).getMBB() == LoopBB) + CC = AArch64CC::getInvertedCondCode(CC); + + // Accumulate conditions to exit the loop + Register AccCond = AArch64::XZR; + + // If CC holds, CurCond+1 is returned; otherwise CurCond is returned. + auto AccumulateCond = [&](Register CurCond, + AArch64CC::CondCode CC) -> Register { + Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); + BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr)) + .addReg(NewCond, RegState::Define) + .addReg(CurCond) + .addReg(CurCond) + .addImm(AArch64CC::getInvertedCondCode(CC)); + return NewCond; + }; + + if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) { + // Update and Comp for I==0 are already exists in MBB + // (MBB is an unrolled kernel) + Register Counter; + for (int I = 0; I <= TC; ++I) { + Register NextCounter; + if (I != 0) + NextCounter = + cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end()); + + AccCond = AccumulateCond(AccCond, CC); + + if (I != TC) { + if (I == 0) { + if (Update != Comp && IsUpdatePriorComp) { + Counter = + LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg(); + NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, + MBB.end()); + } else { + // can use already calculated value + NextCounter = LastStage0Insts[Update]->getOperand(0).getReg(); + } + } else if (Update != Comp) { + NextCounter = + cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end()); + } + } + Counter = NextCounter; + } + } else { + Register Counter; + if (LastStage0Insts.empty()) { + // use initial counter value (testing if the trip count is sufficient to + // be executed by pipelined code) + Counter = Init; + if (IsUpdatePriorComp) + Counter = + cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end()); + } else { + // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block. + Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg(); + } + + for (int I = 0; I <= TC; ++I) { + Register NextCounter; + NextCounter = + cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end()); + AccCond = AccumulateCond(AccCond, CC); + if (I != TC && Update != Comp) + NextCounter = + cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end()); + Counter = NextCounter; + } + } + + // If AccCond == 0, the remainder is greater than TC. + BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri)) + .addReg(AArch64::XZR, RegState::Define | RegState::Dead) + .addReg(AccCond) + .addImm(0) + .addImm(0); + Cond.clear(); + Cond.push_back(MachineOperand::CreateImm(AArch64CC::EQ)); +} + +static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, + Register &RegMBB, Register &RegOther) { + assert(Phi.getNumOperands() == 5); + if (Phi.getOperand(2).getMBB() == MBB) { + RegMBB = Phi.getOperand(1).getReg(); + RegOther = Phi.getOperand(3).getReg(); + } else { + assert(Phi.getOperand(4).getMBB() == MBB); + RegMBB = Phi.getOperand(3).getReg(); + RegOther = Phi.getOperand(1).getReg(); + } +} + +static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) { + if (!Reg.isVirtual()) + return false; + const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + return MRI.getVRegDef(Reg)->getParent() != BB; +} + +/// If Reg is an induction variable, return true and set some parameters +static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, + MachineInstr *&UpdateInst, + unsigned &UpdateCounterOprNum, Register &InitReg, + bool &IsUpdatePriorComp) { + // Example: + // + // Preheader: + // InitReg = ... + // LoopBB: + // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB) + // Reg = COPY Reg0 ; COPY is ignored. + // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value. + // ; Reg is the value calculated in the previous + // ; iteration, so IsUpdatePriorComp == false. + + if (LoopBB->pred_size() != 2) + return false; + if (!Reg.isVirtual()) + return false; + const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + UpdateInst = nullptr; + UpdateCounterOprNum = 0; + InitReg = 0; + IsUpdatePriorComp = true; + Register CurReg = Reg; + while (true) { + MachineInstr *Def = MRI.getVRegDef(CurReg); + if (Def->getParent() != LoopBB) + return false; + if (Def->isCopy()) { + // Ignore copy instructions unless they contain subregisters + if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg()) + return false; + CurReg = Def->getOperand(1).getReg(); + } else if (Def->isPHI()) { + if (InitReg != 0) + return false; + if (!UpdateInst) + IsUpdatePriorComp = false; + extractPhiReg(*Def, LoopBB, CurReg, InitReg); + } else { + if (UpdateInst) + return false; + switch (Def->getOpcode()) { + case AArch64::ADDSXri: + case AArch64::ADDSWri: + case AArch64::SUBSXri: + case AArch64::SUBSWri: + case AArch64::ADDXri: + case AArch64::ADDWri: + case AArch64::SUBXri: + case AArch64::SUBWri: + UpdateInst = Def; + UpdateCounterOprNum = 1; + break; + case AArch64::ADDSXrr: + case AArch64::ADDSWrr: + case AArch64::SUBSXrr: + case AArch64::SUBSWrr: + case AArch64::ADDXrr: + case AArch64::ADDWrr: + case AArch64::SUBXrr: + case AArch64::SUBWrr: + UpdateInst = Def; + if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB)) + UpdateCounterOprNum = 1; + else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB)) + UpdateCounterOprNum = 2; + else + return false; + break; + default: + return false; + } + CurReg = Def->getOperand(UpdateCounterOprNum).getReg(); + } + + if (!CurReg.isVirtual()) + return false; + if (Reg == CurReg) + break; + } + + if (!UpdateInst) + return false; + + return true; } std::unique_ptr AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // Accept loops that meet the following conditions + // * The conditional branch is BCC + // * The compare instruction is ADDS/SUBS/WHILEXX + // * One operand of the compare is an induction variable and the other is a + // loop invariant value + // * The induction variable is incremented/decremented by a single instruction + // * Does not contain CALL or instructions which have unmodeled side effects + + for (MachineInstr &MI : *LoopBB) + if (MI.isCall() || MI.hasUnmodeledSideEffects()) + // This instruction may use NZCV, which interferes with the instruction to + // be inserted for loop control. + return nullptr; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (analyzeBranch(*LoopBB, TBB, FBB, Cond)) @@ -9641,48 +9919,76 @@ AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { return nullptr; // Must be conditional branch - if (FBB == nullptr) + if (TBB != LoopBB && FBB == nullptr) return nullptr; assert((TBB == LoopBB || FBB == LoopBB) && "The Loop must be a single-basic-block loop"); + MachineInstr *CondBranch = &*LoopBB->getFirstTerminator(); + const TargetRegisterInfo &TRI = getRegisterInfo(); + + if (CondBranch->getOpcode() != AArch64::Bcc) + return nullptr; + // Normalization for createTripCountGreaterCondition() if (TBB == LoopBB) reverseBranchCondition(Cond); - MachineInstr *CondBranch = &*LoopBB->getFirstTerminator(); - const TargetRegisterInfo &TRI = getRegisterInfo(); - - // Find the immediate predecessor of the conditional branch - MachineInstr *PredBranch = nullptr; - if (CondBranch->getOpcode() == AArch64::Bcc) { - for (MachineInstr &MI : reverse(*LoopBB)) { - if (MI.modifiesRegister(AArch64::NZCV, &TRI)) { - PredBranch = &MI; + MachineInstr *Comp = nullptr; + unsigned CompCounterOprNum = 0; + for (MachineInstr &MI : reverse(*LoopBB)) { + if (MI.modifiesRegister(AArch64::NZCV, &TRI)) { + // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the + // operands is a loop invariant value + + switch (MI.getOpcode()) { + case AArch64::SUBSXri: + case AArch64::SUBSWri: + case AArch64::ADDSXri: + case AArch64::ADDSWri: + Comp = &MI; + CompCounterOprNum = 1; break; + case AArch64::ADDSWrr: + case AArch64::ADDSXrr: + case AArch64::SUBSWrr: + case AArch64::SUBSXrr: + Comp = &MI; + break; + default: + if (isWhileOpcode(MI.getOpcode())) { + Comp = &MI; + break; + } + return nullptr; } - } - if (!PredBranch) - return nullptr; - } else if (isCompareAndBranch(CondBranch->getOpcode())) { - const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); - Register Reg = CondBranch->getOperand(0).getReg(); - if (!Reg.isVirtual()) - return nullptr; - PredBranch = MRI.getVRegDef(Reg); - // MachinePipeliner does not expect that the immediate predecessor is a Phi - if (PredBranch->isPHI()) - return nullptr; + if (CompCounterOprNum == 0) { + if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB)) + CompCounterOprNum = 2; + else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB)) + CompCounterOprNum = 1; + else + return nullptr; + } + break; + } + } + if (!Comp) + return nullptr; - if (PredBranch->getParent() != LoopBB) - return nullptr; - } else { + MachineInstr *Update = nullptr; + Register Init; + bool IsUpdatePriorComp; + unsigned UpdateCounterOprNum; + if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB, + Update, UpdateCounterOprNum, Init, IsUpdatePriorComp)) return nullptr; - } - return std::make_unique(PredBranch, Cond); + return std::make_unique( + LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum, + Init, IsUpdatePriorComp, Cond); } #define GET_INSTRINFO_HELPERS diff --git a/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir b/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir index 94dd299d1caa7..630a89364c8c9 100644 --- a/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir +++ b/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir @@ -1,8 +1,11 @@ # RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-enable-copytophi=0 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts -# An acceptable loop by pipeliner: TBB == ExitBB, FBB == LoopBB, Compare and branch -# CHECK: Schedule Found? 1 +# Compare and branch instructions are not supported now. +# CHECK: Unable to analyzeLoop, can NOT pipeline Loop + +# (if supported) An acceptable loop by pipeliner: TBB == ExitBB, FBB == LoopBB, Compare and branch +# CHECK(if supported): Schedule Found? 1 --- | define dso_local void @func(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr #0 { diff --git a/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir b/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir index fbd74a777aa1e..cb0b1e3028753 100644 --- a/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir +++ b/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir @@ -1,8 +1,11 @@ # RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-enable-copytophi=0 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts -# An acceptable loop by pipeliner TBB == LoopBB, FBB == ExitBB, Compare and branch -# CHECK: Schedule Found? 1 +# Compare and branch instructions are not supported now. +# CHECK: Unable to analyzeLoop, can NOT pipeline Loop + +# (if supported) An acceptable loop by pipeliner TBB == LoopBB, FBB == ExitBB, Compare and branch +# CHECK(if supported): Schedule Found? 1 --- | define dso_local void @func(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr #0 { diff --git a/llvm/test/CodeGen/AArch64/sms-mve1.mir b/llvm/test/CodeGen/AArch64/sms-mve1.mir new file mode 100644 index 0000000000000..c7f187c807ead --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve1.mir @@ -0,0 +1,144 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# #stages: 2, unroll count: 3 + +# the calculation result can be checked as follows (driver code written by C): +# for (i=2; i&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# same as sms-mve1.mir except for the order of the operands + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr1]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr2]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr2]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr3]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr4]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[PHI3]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr5]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr5]] + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr6]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr6]] + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr7]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr8]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr8]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr9]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[ADDXrr10]]:gpr64 = ADDXrr [[COPY1]], [[PHI7]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr10]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64 = ADDXrr %11, %12 + dead $xzr = SUBSXrr %13, %10, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve11.mir b/llvm/test/CodeGen/AArch64/sms-mve11.mir new file mode 100644 index 0000000000000..ab8cff858c2e6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve11.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# counter increment/compare instruction with immediate operand + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY1]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri1]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri [[ADDXri1]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri2]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri [[ADDXri2]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri3]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXri4:%[0-9]+]]:gpr64common = ADDXri [[COPY1]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri4]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64common = PHI [[ADDXri6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64common = PHI [[ADDXri7:%[0-9]+]], %bb.5, [[ADDXri4]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXri5:%[0-9]+]]:gpr64common = ADDXri [[PHI3]], 1, 0 + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri5]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXri6]]:gpr64common = ADDXri [[ADDXri5]], 1, 0 + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri6]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[ADDXri7]]:gpr64common = ADDXri [[ADDXri6]], 1, 0 + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri7]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXri8:%[0-9]+]]:gpr64common = ADDXri [[ADDXri7]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri8]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXri9:%[0-9]+]]:gpr64common = ADDXri [[ADDXri8]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri9]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri7]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64common = PHI [[COPY1]], %bb.3, [[ADDXri7]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64common = PHI [[PHI6]], %bb.7, [[ADDXri10:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[ADDXri10]]:gpr64common = ADDXri [[PHI7]], 1, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri10]], 20, 0, implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64common = COPY $x0 + %11:gpr64common = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64common = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64common = ADDXri %12, 1, 0 + dead $xzr = SUBSXri %13, 20, 0, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve12.mir b/llvm/test/CodeGen/AArch64/sms-mve12.mir new file mode 100644 index 0000000000000..6fce7b5fb9a8e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve12.mir @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-v1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# the compare instruction is whilexx + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: [[CNTD_XPiI:%[0-9]+]]:gpr64common = CNTD_XPiI 31, 1, implicit $vg + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 4, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D1:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr1]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 4, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr1]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D2:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr2]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 4, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr2]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D3:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr3]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 4, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D4:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr4]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64common = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64common = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64common = ADDXrr [[PHI3]], [[CNTD_XPiI]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: dead [[WHILELO_PXX_D5:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr5]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr6]]:gpr64common = ADDXrr [[ADDXrr5]], [[CNTD_XPiI]] + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead [[WHILELO_PXX_D6:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr6]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr7]]:gpr64common = ADDXrr [[ADDXrr6]], [[CNTD_XPiI]] + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: dead [[WHILELO_PXX_D7:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr7]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 4, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr7]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D8:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr8]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 4, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr8]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D9:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr9]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 4, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead [[WHILELO_PXX_D10:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr7]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 4, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64common = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64common = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[ADDXrr10]]:gpr64common = ADDXrr [[PHI7]], [[CNTD_XPiI]] + ; CHECK-NEXT: dead [[WHILELO_PXX_D11:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr10]], [[COPY]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 4, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64common = COPY $x0 + %11:gpr64common = COPY $x1 + %20:fpr64 = FMOVDi 1 + %26:gpr64common = CNTD_XPiI 31, 1, implicit $vg + + bb.1: + %12:gpr64common = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64common = ADDXrr %12, %26 + dead %30:ppr = WHILELO_PXX_D %13, %10, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 4, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve2.mir b/llvm/test/CodeGen/AArch64/sms-mve2.mir new file mode 100644 index 0000000000000..103f374196977 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve2.mir @@ -0,0 +1,129 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=4 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# #stages: 2, unroll count: 2 + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr2]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr3:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr4:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr6:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr5:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr7:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr3]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr3]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr6]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr3]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5]]:gpr64 = ADDXrr [[ADDXrr4]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr7]]:fpr64 = FADDDrr [[FADDDrr6]], [[FADDDrr3]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr6:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr3]], [[CSINCXr3]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr4]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr8:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr3]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr5]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr6]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr5]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr7:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr9:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr9]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr10:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr9]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr9]], %bb.1, [[FADDDrr6]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr11]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64 = ADDXrr %12, %11 + dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve3.mir b/llvm/test/CodeGen/AArch64/sms-mve3.mir new file mode 100644 index 0000000000000..017383b46be0c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve3.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# #stages: 2, unroll count: 1 + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr1]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FADDDrr]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr2:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr3:%[0-9]+]], %bb.5, [[ADDXrr2]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr4:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr3]]:gpr64 = ADDXrr [[PHI1]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4]]:fpr64 = FADDDrr [[FADDDrr2]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr2]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[FADDDrr4]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr2]], %bb.6 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr3]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gpr64 = PHI [[PHI4]], %bb.7, [[ADDXrr4:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:fpr64 = PHI [[PHI3]], %bb.7, [[FADDDrr6:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[ADDXrr4]]:gpr64 = ADDXrr [[PHI5]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr6]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI6]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr6]], [[FADDDrr6]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr7]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[FADDDrr6]], %bb.1, [[FADDDrr2]], %bb.6 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[FADDDrr8]], %bb.1, [[FADDDrr5]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI7]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64 = ADDXrr %12, %11 + dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %21, implicit $fpcr + %23:fpr64 = FADDDrr %22, %22, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve4.mir b/llvm/test/CodeGen/AArch64/sms-mve4.mir new file mode 100644 index 0000000000000..203ce6ddfd2ec --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve4.mir @@ -0,0 +1,130 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# no dedicated exit + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: dead [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.3, implicit $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr1]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FADDDrr]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr2:%[0-9]+]], %bb.6, [[FADDDrr]], %bb.5 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr3:%[0-9]+]], %bb.6, [[ADDXrr2]], %bb.5 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr4:%[0-9]+]], %bb.6, [[FADDDrr1]], %bb.5 + ; CHECK-NEXT: [[FADDDrr2]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr3]]:gpr64 = ADDXrr [[PHI1]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4]]:fpr64 = FADDDrr [[FADDDrr2]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr2]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.6, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[FADDDrr4]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.8, implicit $nzcv + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.4, [[FADDDrr2]], %bb.7 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.4, [[ADDXrr3]], %bb.7 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.2, [[FADDDrr5]], %bb.7 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.9(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[PHI4]], %bb.8, [[ADDXrr4:%[0-9]+]], %bb.2 + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[PHI3]], %bb.8, [[FADDDrr6:%[0-9]+]], %bb.2 + ; CHECK-NEXT: [[ADDXrr4]]:gpr64 = ADDXrr [[PHI6]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr6]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI7]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr6]], [[FADDDrr6]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr7]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.0, [[PHI5]], %bb.9 + ; CHECK-NEXT: $d0 = COPY [[PHI8]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + dead %15:gpr64 = SUBSXrr %10, %11, implicit-def $nzcv + Bcc 0, %bb.3, implicit $nzcv + + bb.1: + + bb.2: + %12:gpr64 = PHI %11, %bb.1, %13, %bb.2 + %24:fpr64 = PHI %20, %bb.1, %21, %bb.2 + %13:gpr64 = ADDXrr %12, %11 + dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %21, implicit $fpcr + %23:fpr64 = FADDDrr %22, %22, implicit $fpcr + Bcc 1, %bb.2, implicit $nzcv + B %bb.3 + + bb.3: + %25:fpr64 = PHI %20, %bb.0, %23, %bb.2 + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve5.mir b/llvm/test/CodeGen/AArch64/sms-mve5.mir new file mode 100644 index 0000000000000..4795df70c07a4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve5.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# exit loop when condition holds + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr2]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr7]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr8]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr8]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr9]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[ADDXrr10]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr10]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 0, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64 = ADDXrr %12, %11 + dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 0, %bb.2, implicit $nzcv + B %bb.1 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve6.mir b/llvm/test/CodeGen/AArch64/sms-mve6.mir new file mode 100644 index 0000000000000..527e9e9d09dfc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve6.mir @@ -0,0 +1,138 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# #stages: 2, unroll count: 3 + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr5:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[PHI3]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5]]:gpr64 = ADDXrr [[ADDXrr4]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr7:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr6]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr8:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[PHI7]], implicit-def $nzcv + ; CHECK-NEXT: [[ADDXrr8]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + dead $xzr = SUBSXrr %10, %12, implicit-def $nzcv + %13:gpr64 = ADDXrr %12, %11 + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve7.mir b/llvm/test/CodeGen/AArch64/sms-mve7.mir new file mode 100644 index 0000000000000..3b3d0ea09eaf0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve7.mir @@ -0,0 +1,128 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# compare instruction also updates the counter + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[SUBSXrr1:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[SUBSXrr2:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr1]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[SUBSXrr3:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr2]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[SUBSXrr4:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[SUBSXrr6:%[0-9]+]], %bb.5, [[COPY]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[SUBSXrr7:%[0-9]+]], %bb.5, [[SUBSXrr4]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[SUBSXrr5:%[0-9]+]]:gpr64 = SUBSXrr [[PHI3]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[SUBSXrr6]]:gpr64 = SUBSXrr [[SUBSXrr5]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[SUBSXrr7]]:gpr64 = SUBSXrr [[SUBSXrr6]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[SUBSXrr8:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr7]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[SUBSXrr9:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr8]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[SUBSXrr10:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr6]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY]], %bb.3, [[SUBSXrr7]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[SUBSXrr11:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[SUBSXrr11]]:gpr64 = SUBSXrr [[PHI7]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %10, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %13:gpr64 = SUBSXrr %12, %11, implicit-def $nzcv + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve8.mir b/llvm/test/CodeGen/AArch64/sms-mve8.mir new file mode 100644 index 0000000000000..c1ea6defac1fb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve8.mir @@ -0,0 +1,138 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# compare instruction also updates the counter + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY1]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr1]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr2]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY1]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr5:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: dead $xzr = SUBSXrr [[PHI3]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr4]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5]]:gpr64 = ADDXrr [[ADDXrr4]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr5]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr6]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr7:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr5]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr6]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr8:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: dead $xzr = SUBSXrr [[PHI7]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[ADDXrr8]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]] + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + dead $xzr = SUBSXrr %12, %11, implicit-def $nzcv + %13:gpr64 = ADDXrr %12, %11 + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-mve9.mir b/llvm/test/CodeGen/AArch64/sms-mve9.mir new file mode 100644 index 0000000000000..469a9ecfff8ee --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-mve9.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s + +# test pipeliner code genearation by MVE algorithm +# COPY instructions exist + +... +--- +name: func +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: func + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY1]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr1]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr2]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] + ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY2]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64sp = COPY [[ADDXrr3]] + ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64sp = PHI [[COPY7:%[0-9]+]], %bb.5, [[COPY1]], %bb.4 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr64 = PHI [[COPY8:%[0-9]+]], %bb.5, [[COPY2]], %bb.4 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4 + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gpr64sp = PHI [[COPY9:%[0-9]+]], %bb.5, [[COPY3]], %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[PHI5]] + ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI3]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI6]], [[PHI]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY4]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64sp = COPY [[ADDXrr4]] + ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI3]], implicit $fpcr + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64 = COPY [[COPY5]] + ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI3]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[COPY6]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY6]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[COPY7]]:gpr64sp = COPY [[ADDXrr5]] + ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[COPY8]]:gpr64 = COPY [[COPY7]] + ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr + ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[COPY8]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY8]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[COPY9]]:gpr64sp = COPY [[ADDXrr6]] + ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr6]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr7:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY8]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gpr64common = PHI [[COPY1]], %bb.3, [[COPY9]], %bb.6 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:gpr64sp = PHI [[COPY11:%[0-9]+]]8, %bb.7, [[COPY11]], %bb.1 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[PHI7]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr64 = COPY [[PHI9]] + ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY10]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[COPY10]], [[COPY1]] + ; CHECK-NEXT: [[COPY11]]:gpr64sp = COPY [[ADDXrr8]] + ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI10]], implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI11:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6 + ; CHECK-NEXT: [[PHI12:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6 + ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI11]], [[PHI12]], implicit $fpcr + ; CHECK-NEXT: $d0 = COPY [[FMULDrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + %20:fpr64 = FMOVDi 1 + + bb.1: + %12:gpr64sp = PHI %11, %bb.0, %15, %bb.1 + %24:fpr64 = PHI %20, %bb.0, %21, %bb.1 + %14:gpr64 = COPY %12 + dead $xzr = SUBSXrr %14, %11, implicit-def $nzcv + %13:gpr64 = ADDXrr %14, %11 + %15:gpr64sp = COPY %13 + %21:fpr64 = FADDDrr %20, %24, implicit $fpcr + %22:fpr64 = FADDDrr %21, %24, implicit $fpcr + %23:fpr64 = FADDDrr %22, %24, implicit $fpcr + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + %25:fpr64 = FMULDrr %21, %23, implicit $fpcr + $d0 = COPY %25 + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir b/llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir new file mode 100644 index 0000000000000..e6d86859a41b9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir @@ -0,0 +1,109 @@ +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -debug-only=pipeliner 2>&1 | FileCheck %s +# REQUIRES: asserts + +# unacceptable loops by pipeliner + +... +--- +name: func1 +tracksRegLiveness: true +body: | + ; multiple counter increment instructions + ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %13:gpr64 = ADDXrr %12, %11 + %14:gpr64 = ADDXrr %13, %11 + dead $xzr = SUBSXrr %10, %14, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: +... +--- +name: func2 +tracksRegLiveness: true +body: | + ; neither operand of the increment instruction is a loop invariant value + ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %13:gpr64 = ADDXrr %12, %12 + dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: +... +--- +name: func3 +tracksRegLiveness: true +body: | + ; neither operand of the compare instruction is a loop invariant value + ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %13:gpr64 = ADDXrr %12, %11 + dead $xzr = SUBSXrr %13, %13, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: +... +--- +name: func4 +tracksRegLiveness: true +body: | + ; multiple phi instructions + ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %14:gpr64 = PHI %11, %bb.0, %15, %bb.1 + dead $xzr = SUBSXrr %12, %10, implicit-def $nzcv + %13:gpr64 = ADDXrr %14, %11 + %15:gpr64 = ADDXrr %12, %11 + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: +... +--- +name: func5 +tracksRegLiveness: true +body: | + ; not an increment instruction + ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop + bb.0.entry: + liveins: $x0, $x1 + %10:gpr64 = COPY $x0 + %11:gpr64 = COPY $x1 + + bb.1: + %12:gpr64 = PHI %11, %bb.0, %13, %bb.1 + %13:gpr64 = ORRXrr %12, %12 + dead $xzr = SUBSXrr %12, %10, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: +... diff --git a/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir b/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir index c5b76d88ff00d..290f7027244f2 100644 --- a/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir +++ b/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir @@ -1,9 +1,12 @@ # RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-enable-copytophi=0 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts -# An acceptable loop by pipeliner TBB == LoopBB, FBB == ExitBB, Compare and branch -# CHECK: SU([[SU0:[0-9]+]]): [[V0:%[0-9]+]]:gpr64common = SUBXri [[V1:%[0-9]+]]:gpr64common, 1, 0 -# CHECK: Do not pipeline SU([[SU0:[0-9]+]]) +# Compare and branch instructions are not supported now. +# CHECK: Unable to analyzeLoop, can NOT pipeline Loop + +# (if supported) Check that instructions for loop control are not pipelined when compare and branch is used. +# CHECK(if supported): SU([[SU0:[0-9]+]]): [[V0:%[0-9]+]]:gpr64common = SUBXri [[V1:%[0-9]+]]:gpr64common, 1, 0 +# CHECK(if supported): Do not pipeline SU([[SU0:[0-9]+]]) --- | define dso_local void @func(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr #0 {