diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 8e47d0cead757..f95a02aad4559 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -390,6 +390,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { const SwingSchedulerDDG *getDDG() const { return DDG.get(); } + bool mayOverlapInLaterIter(const MachineInstr *BaseMI, + const MachineInstr *OtherMI) const; + private: void addLoopCarriedDependences(AAResults *AA); void updatePhiDependences(); @@ -409,7 +412,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { void computeNodeOrder(NodeSetType &NodeSets); void checkValidNodeOrder(const NodeSetType &Circuits) const; bool schedulePipeline(SMSchedule &Schedule); - bool computeDelta(MachineInstr &MI, unsigned &Delta) const; + bool computeDelta(const MachineInstr &MI, int &Delta) const; MachineInstr *findDefInLoop(Register Reg); bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos, unsigned &NewBase, diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index acd42aa497c6f..286ea4d87bf3c 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -2523,9 +2523,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { return scheduleFound && Schedule.getMaxStageCount() > 0; } +static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + Register Result; + for (const MachineOperand &Use : MI.all_uses()) { + Register Reg = Use.getReg(); + if (!Reg.isVirtual()) + return Register(); + if (MRI.getVRegDef(Reg)->getParent() != MI.getParent()) + continue; + if (Result) + return Register(); + Result = Reg; + } + return Result; +} + +/// When Op is a value that is incremented recursively in a loop and there is a +/// unique instruction that increments it, returns true and sets Value. +static bool findLoopIncrementValue(const MachineOperand &Op, int &Value) { + if (!Op.isReg() || !Op.getReg().isVirtual()) + return false; + + Register OrgReg = Op.getReg(); + Register CurReg = OrgReg; + const MachineBasicBlock *LoopBB = Op.getParent()->getParent(); + const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + + const TargetInstrInfo *TII = + LoopBB->getParent()->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = + LoopBB->getParent()->getSubtarget().getRegisterInfo(); + + MachineInstr *Phi = nullptr; + MachineInstr *Increment = nullptr; + + // Traverse definitions until it reaches Op or an instruction that does not + // satisfy the condition. + // Acceptable example: + // bb.0: + // %0 = PHI %3, %bb.0, ... + // %2 = ADD %0, Value + // ... = LOAD %2(Op) + // %3 = COPY %2 + while (true) { + if (!CurReg.isValid() || !CurReg.isVirtual()) + return false; + MachineInstr *Def = MRI.getVRegDef(CurReg); + if (Def->getParent() != LoopBB) + return false; + + if (Def->isCopy()) { + // Ignore copy instructions unless they contain subregisters + if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg()) + return false; + CurReg = Def->getOperand(1).getReg(); + } else if (Def->isPHI()) { + // There must be just one Phi + if (Phi) + return false; + Phi = Def; + CurReg = getLoopPhiReg(*Def, LoopBB); + } else if (TII->getIncrementValue(*Def, Value)) { + // Potentially a unique increment + if (Increment) + // Multiple increments exist + return false; + + const MachineOperand *BaseOp; + int64_t Offset; + bool OffsetIsScalable; + if (TII->getMemOperandWithOffset(*Def, BaseOp, Offset, OffsetIsScalable, + TRI)) { + // Pre/post increment instruction + CurReg = BaseOp->getReg(); + } else { + // If only one of the operands is defined within the loop, it is assumed + // to be an incremented value. + CurReg = findUniqueOperandDefinedInLoop(*Def); + if (!CurReg.isValid()) + return false; + } + Increment = Def; + } else { + return false; + } + if (CurReg == OrgReg) + break; + } + + if (!Phi || !Increment) + return false; + + return true; +} + /// Return true if we can compute the amount the instruction changes /// during each iteration. Set Delta to the amount of the change. -bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const { +bool SwingSchedulerDAG::computeDelta(const MachineInstr &MI, int &Delta) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; @@ -2540,24 +2635,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const { if (!BaseOp->isReg()) return false; - Register BaseReg = BaseOp->getReg(); - - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Check if there is a Phi. If so, get the definition in the loop. - MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); - if (BaseDef && BaseDef->isPHI()) { - BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); - BaseDef = MRI.getVRegDef(BaseReg); - } - if (!BaseDef) - return false; - - int D = 0; - if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) - return false; - - Delta = D; - return true; + return findLoopIncrementValue(*BaseOp, Delta); } /// Check if we can change the instruction to use an offset value from the @@ -2675,6 +2753,100 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { return Def; } +/// Return false if there is no overlap between the region accessed by BaseMI in +/// an iteration and the region accessed by OtherMI in subsequent iterations. +bool SwingSchedulerDAG::mayOverlapInLaterIter( + const MachineInstr *BaseMI, const MachineInstr *OtherMI) const { + int DeltaB, DeltaO, Delta; + if (!computeDelta(*BaseMI, DeltaB) || !computeDelta(*OtherMI, DeltaO) || + DeltaB != DeltaO) + return true; + Delta = DeltaB; + + const MachineOperand *BaseOpB, *BaseOpO; + int64_t OffsetB, OffsetO; + bool OffsetBIsScalable, OffsetOIsScalable; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB, + OffsetBIsScalable, TRI) || + !TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO, + OffsetOIsScalable, TRI)) + return true; + + if (OffsetBIsScalable || OffsetOIsScalable) + return true; + + if (!BaseOpB->isIdenticalTo(*BaseOpO)) { + // Pass cases with different base operands but same initial values. + // Typically for when pre/post increment is used. + + if (!BaseOpB->isReg() || !BaseOpO->isReg()) + return true; + Register RegB = BaseOpB->getReg(), RegO = BaseOpO->getReg(); + if (!RegB.isVirtual() || !RegO.isVirtual()) + return true; + + MachineInstr *DefB = MRI.getVRegDef(BaseOpB->getReg()); + MachineInstr *DefO = MRI.getVRegDef(BaseOpO->getReg()); + if (!DefB || !DefO || !DefB->isPHI() || !DefO->isPHI()) + return true; + + unsigned InitValB = 0; + unsigned LoopValB = 0; + unsigned InitValO = 0; + unsigned LoopValO = 0; + getPhiRegs(*DefB, BB, InitValB, LoopValB); + getPhiRegs(*DefO, BB, InitValO, LoopValO); + MachineInstr *InitDefB = MRI.getVRegDef(InitValB); + MachineInstr *InitDefO = MRI.getVRegDef(InitValO); + + if (!InitDefB->isIdenticalTo(*InitDefO)) + return true; + } + + LocationSize AccessSizeB = (*BaseMI->memoperands_begin())->getSize(); + LocationSize AccessSizeO = (*OtherMI->memoperands_begin())->getSize(); + + // This is the main test, which checks the offset values and the loop + // increment value to determine if the accesses may be loop carried. + if (!AccessSizeB.hasValue() || !AccessSizeO.hasValue()) + return true; + + LLVM_DEBUG({ + dbgs() << "Overlap check:\n"; + dbgs() << " BaseMI: "; + BaseMI->dump(); + dbgs() << " Base + " << OffsetB << " + I * " << Delta + << ", Len: " << AccessSizeB.getValue() << "\n"; + dbgs() << " OtherMI: "; + OtherMI->dump(); + dbgs() << " Base + " << OffsetO << " + I * " << Delta + << ", Len: " << AccessSizeO.getValue() << "\n"; + }); + + // Excessive overlap may be detected in strided patterns. + // For example, the memory addresses of the store and the load in + // for (i=0; i OhterNextIterMaxAddr) { + LLVM_DEBUG(dbgs() << " Result: No overlap\n"); + return false; + } + } else { + int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue() - 1; + int64_t OtherNextIterMinAddr = OffsetO + Delta; + if (BaseMaxAddr < OtherNextIterMinAddr) { + LLVM_DEBUG(dbgs() << " Result: No overlap\n"); + return false; + } + } + LLVM_DEBUG(dbgs() << " Result: Overlap\n"); + return true; +} + /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a value /// that may be used or defined by the source in a subsequent iteration. @@ -2706,61 +2878,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep( // The conservative assumption is that a dependence between memory operations // may be loop carried. The following code checks when it can be proved that // there is no loop carried dependence. - unsigned DeltaS, DeltaD; - if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) - return true; - - const MachineOperand *BaseOpS, *BaseOpD; - int64_t OffsetS, OffsetD; - bool OffsetSIsScalable, OffsetDIsScalable; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable, - TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable, - TRI)) - return true; - - assert(!OffsetSIsScalable && !OffsetDIsScalable && - "Expected offsets to be byte offsets"); - - MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg()); - MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg()); - if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI()) - return true; - - unsigned InitValS = 0; - unsigned LoopValS = 0; - unsigned InitValD = 0; - unsigned LoopValD = 0; - getPhiRegs(*DefS, BB, InitValS, LoopValS); - getPhiRegs(*DefD, BB, InitValD, LoopValD); - MachineInstr *InitDefS = MRI.getVRegDef(InitValS); - MachineInstr *InitDefD = MRI.getVRegDef(InitValD); - - if (!InitDefS->isIdenticalTo(*InitDefD)) - return true; - - // Check that the base register is incremented by a constant value for each - // iteration. - MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS); - int D = 0; - if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D)) - return true; - - LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize(); - LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize(); - - // This is the main test, which checks the offset values and the loop - // increment value to determine if the accesses may be loop carried. - if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue()) - return true; - - if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() || - DeltaD < AccessSizeD.getValue()) - return true; - - return (OffsetS + (int64_t)AccessSizeS.getValue() < - OffsetD + (int64_t)AccessSizeD.getValue()); + return mayOverlapInLaterIter(DI, SI); } void SwingSchedulerDAG::postProcessDAG() { diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir index c333f1b7f31df..afc989cbc6921 100644 --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir @@ -3,6 +3,13 @@ # Test that the loop carried dependence check correctly identifies a recurrence. +# CHECK: Overlap check: +# CHECK-NEXT: BaseMI: S2_storerh_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s16) into %ir.lsr.iv24) +# CHECK-NEXT: Base + 0 + I * 4, Len: 2 +# CHECK-NEXT: OtherMI: %{{[0-9]+}}:intregs = L2_loadrh_io %{{[0-9]+}}:intregs, -8 :: (load (s16) from %ir.cgep10) +# CHECK-NEXT: Base + -8 + I * 4, Len: 2 +# CHECK-NEXT: Result: Overlap + # CHECK: Rec NodeSet # CHECK: Rec NodeSet # CHECK: Rec NodeSet diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir index 16ff5999e29ca..e16334ba7978f 100644 --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir @@ -1,14 +1,26 @@ # RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s # REQUIRES: asserts -# Test that the loop carried dependence check correctly identifies a recurrence +# Test that the loop carried dependence check correctly identifies dependences # when the loop variable decreases and the array index offset is negative. -# CHECK: Rec NodeSet -# CHECK: Rec NodeSet -# CHECK: SU(3) -# CHECK: SU(4) -# CHECK: SU(5) +# No dependence from the store to the load. +# CHECK: Overlap check: +# CHECK-NEXT: BaseMI: S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1) +# CHECK-NEXT: Base + 0 + I * -4, Len: 4 +# CHECK-NEXT: OtherMI: %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep) +# CHECK-NEXT: Base + -8 + I * -4, Len: 4 +# CHECK-NEXT: Result: No overlap + +# TODO: There is a loop carried dependence from the load to the store but it +# is not recognised. addLoopCarriedDependences() should be modified to +# recognise the dependence and enable the following checks. +# CHECK-AFTER-FIX: Overlap check: +# CHECK-AFTER-FIX-NEXT: BaseMI: %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep) +# CHECK-AFTER-FIX-NEXT: Base + -8 + I * -4, Len: 4 +# CHECK-AFTER-FIX-NEXT: OtherMI: S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1) +# CHECK-AFTER-FIX-NEXT: Base + 0 + I * -4, Len: 4 +# CHECK-AFTER-FIX-NEXT: Result: Overlap! --- | diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir index a1b0aec4cf81c..91eb225580910 100644 --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir @@ -7,6 +7,13 @@ # requires to use a single CHECK-NOT to match such a Rec NodeSet. Fortunately # the atom '.' does not match a newline but anything else on a line. +# CHECK: Overlap check: +# CHECK-NEXT: BaseMI: %13:intregs = S2_storerh_pi %12:intregs(tied-def 0), 2, %20:intregs :: (store (s16)) +# CHECK-NEXT: Base + 0 + I * 2, Len: 2 +# CHECK-NEXT: OtherMI: %19:intregs, %15:intregs = L2_loadrh_pi %14:intregs(tied-def 1), 2 :: (load (s16)) +# CHECK-NEXT: Base + 0 + I * 2, Len: 2 +# CHECK-NEXT: Result: No overlap + # CHECK-NOT: Rec NodeSet{{.+[[:space:]]}} SU(5){{.+[[:space:]]}} SU(7) ... diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep4.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep4.mir new file mode 100644 index 0000000000000..ff04886be561a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep4.mir @@ -0,0 +1,37 @@ +# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts + +# Loop carried dependence is assumed in cases where increment value cannot be recognized +# (Not supported for multiple increment instruction) + +# CHECK: Rec NodeSet +# CHECK: Rec NodeSet +# CHECK-NEXT: SU(1) +# CHECK-NEXT: SU(2) + +--- +name: test +tracksRegLiveness: true + +body: | + bb.0: + successors: %bb.1 + + %10:intregs = IMPLICIT_DEF + %11:intregs = IMPLICIT_DEF + J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.1 (machine-block-address-taken): + successors: %bb.1, %bb.2 + + %0:intregs = PHI %11, %bb.0, %6, %bb.1 + %4:intregs = L2_loadri_io %0, 0 :: (load (s32)) + S2_storeri_io %0, 0, %10 :: (store (s32)) + %7:intregs = A2_addi %0, -8 + %6:intregs = A2_addi %7, 4 + ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc + + bb.2: + +... diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep5.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep5.mir new file mode 100644 index 0000000000000..bf3cd95a84a72 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep5.mir @@ -0,0 +1,146 @@ +# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts + +# Test boundary cases for loop carried dependence analysis + +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %10:intregs, 0, %0:intregs :: (store (s32)) +#CHECK: Base + 0 + I * 3, Len: 4 +#CHECK: OtherMI: dead %30:intregs = L2_loadri_io %10:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * 3, Len: 4 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %12:intregs, 0, %1:intregs :: (store (s32)) +#CHECK: Base + 0 + I * 4, Len: 4 +#CHECK: OtherMI: dead %31:intregs = L2_loadri_io %12:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * 4, Len: 4 +#CHECK: Result: No overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %14:intregs, 0, %2:intregs :: (store (s64)) +#CHECK: Base + 0 + I * 4, Len: 8 +#CHECK: OtherMI: dead %32:intregs = L2_loadri_io %14:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * 4, Len: 4 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %16:intregs, 0, %3:intregs :: (store (s32)) +#CHECK: Base + 0 + I * 4, Len: 4 +#CHECK: OtherMI: dead %33:intregs = L2_loadri_io %16:intregs, -1 :: (load (s32)) +#CHECK: Base + -1 + I * 4, Len: 4 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %18:intregs, 1, %4:intregs :: (store (s32)) +#CHECK: Base + 1 + I * 4, Len: 4 +#CHECK: OtherMI: dead %34:intregs = L2_loadri_io %18:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * 4, Len: 4 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %20:intregs, 0, %5:intregs :: (store (s32)) +#CHECK: Base + 0 + I * -3, Len: 4 +#CHECK: OtherMI: dead %35:intregs = L2_loadri_io %20:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * -3, Len: 4 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %22:intregs, 0, %6:intregs :: (store (s32)) +#CHECK: Base + 0 + I * -4, Len: 4 +#CHECK: OtherMI: dead %36:intregs = L2_loadri_io %22:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * -4, Len: 4 +#CHECK: Result: No overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %24:intregs, 0, %7:intregs :: (store (s32)) +#CHECK: Base + 0 + I * -4, Len: 4 +#CHECK: OtherMI: dead %37:intregs = L2_loadri_io %24:intregs, 0 :: (load (s64)) +#CHECK: Base + 0 + I * -4, Len: 8 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %26:intregs, 0, %8:intregs :: (store (s32)) +#CHECK: Base + 0 + I * -4, Len: 4 +#CHECK: OtherMI: dead %38:intregs = L2_loadri_io %26:intregs, 1 :: (load (s32)) +#CHECK: Base + 1 + I * -4, Len: 4 +#CHECK: Result: Overlap +#CHECK: Overlap check: +#CHECK: BaseMI: S2_storeri_io %28:intregs, -1, %9:intregs :: (store (s32)) +#CHECK: Base + -1 + I * -4, Len: 4 +#CHECK: OtherMI: dead %39:intregs = L2_loadri_io %28:intregs, 0 :: (load (s32)) +#CHECK: Base + 0 + I * -4, Len: 4 +#CHECK: Result: Overlap + +--- +name: test +tracksRegLiveness: true + +body: | + bb.0: + successors: %bb.1 + + %100:intregs = IMPLICIT_DEF + %200:intregs = IMPLICIT_DEF + %300:intregs = IMPLICIT_DEF + %400:intregs = IMPLICIT_DEF + %500:intregs = IMPLICIT_DEF + %600:intregs = IMPLICIT_DEF + %700:intregs = IMPLICIT_DEF + %800:intregs = IMPLICIT_DEF + %900:intregs = IMPLICIT_DEF + %1000:intregs = IMPLICIT_DEF + J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.1 (machine-block-address-taken): + successors: %bb.1, %bb.2 + + %0:intregs = PHI %100, %bb.0, %1, %bb.1 + %2:intregs = PHI %200, %bb.0, %3, %bb.1 + %4:intregs = PHI %300, %bb.0, %5, %bb.1 + %6:intregs = PHI %400, %bb.0, %7, %bb.1 + %8:intregs = PHI %500, %bb.0, %9, %bb.1 + %10:intregs = PHI %600, %bb.0, %11, %bb.1 + %12:intregs = PHI %700, %bb.0, %13, %bb.1 + %14:intregs = PHI %800, %bb.0, %15, %bb.1 + %16:intregs = PHI %900, %bb.0, %17, %bb.1 + %18:intregs = PHI %1000, %bb.0, %19, %bb.1 + + %1:intregs = A2_addi %0, 3 + %101:intregs = L2_loadri_io %0, 0 :: (load (s32)) + S2_storeri_io %0, 0, %100 :: (store (s32)) + + %3:intregs = A2_addi %2, 4 + %201:intregs = L2_loadri_io %2, 0 :: (load (s32)) + S2_storeri_io %2, 0, %200 :: (store (s32)) + + %5:intregs = A2_addi %4, 4 + %301:intregs = L2_loadri_io %4, 0 :: (load (s32)) + S2_storeri_io %4, 0, %300 :: (store (s64)) + + %7:intregs = A2_addi %6, 4 + %401:intregs = L2_loadri_io %6, -1 :: (load (s32)) + S2_storeri_io %6, 0, %400 :: (store (s32)) + + %9:intregs = A2_addi %8, 4 + %501:intregs = L2_loadri_io %8, 0 :: (load (s32)) + S2_storeri_io %8, 1, %500 :: (store (s32)) + + %11:intregs = A2_addi %10, -3 + %601:intregs = L2_loadri_io %10, 0 :: (load (s32)) + S2_storeri_io %10, 0, %600 :: (store (s32)) + + %13:intregs = A2_addi %12, -4 + %701:intregs = L2_loadri_io %12, 0 :: (load (s32)) + S2_storeri_io %12, 0, %700 :: (store (s32)) + + %15:intregs = A2_addi %14, -4 + %801:intregs = L2_loadri_io %14, 0 :: (load (s64)) + S2_storeri_io %14, 0, %800 :: (store (s32)) + + %17:intregs = A2_addi %16, -4 + %901:intregs = L2_loadri_io %16, 1 :: (load (s32)) + S2_storeri_io %16, 0, %900 :: (store (s32)) + + %19:intregs = A2_addi %18, -4 + %1001:intregs = L2_loadri_io %18, 0 :: (load (s32)) + S2_storeri_io %18, -1, %1000 :: (store (s32)) + + ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def dead $pc + + bb.2: + +... diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll index 76434e79ad994..e9384e8710211 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll @@ -31,7 +31,7 @@ b3: ; preds = %b3, %b2 %v6 = load i32, ptr %v5, align 4 %v7 = tail call i32 @llvm.hexagon.A2.subsat(i32 %v2, i32 %v6) store i32 %v7, ptr %v3, align 4 - %v8 = add i32 %v0, -1 + %v8 = add i32 %v0, 1 %v9 = icmp sgt i32 %v8, 1 br i1 %v9, label %b3, label %b4