diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 2a44ec8032236..1faf279ae2012 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -901,7 +901,10 @@ LLVM_ABI bool sortPtrAccesses(ArrayRef VL, Type *ElemTy, LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType = true); -/// Calculate Start and End points of memory access. +/// Calculate Start and End points of memory access using exact backedge taken +/// count \p BTC if computable or maximum backedge taken count \p MaxBTC +/// otherwise. +/// /// Let's assume A is the first access and B is a memory access on N-th loop /// iteration. Then B is calculated as: /// B = A + Step*N . @@ -915,8 +918,8 @@ LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, /// There is no conflict when the intervals are disjoint: /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) LLVM_ABI std::pair getStartAndEndForAccess( - const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount, - ScalarEvolution *SE, + const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, + const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap, std::pair> *PointerBounds); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 71a75b496455a..880249588f0b2 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -319,17 +319,22 @@ bool llvm::isDereferenceableAndAlignedInLoop( const SCEV *MaxBECount = Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates) : SE.getConstantMaxBackedgeTakenCount(L); + const SCEV *BECount = Predicates + ? SE.getPredicatedBackedgeTakenCount(L, *Predicates) + : SE.getBackedgeTakenCount(L); if (isa(MaxBECount)) return false; const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( - L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr); + L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr); if (isa(AccessStart) || isa(AccessEnd)) return false; // Try to get the access size. const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart); + if (isa(PtrDiff)) + return false; APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff); Value *Base = nullptr; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 94b9fe9581264..212b3bf196c9e 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -188,9 +188,90 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( Members.push_back(Index); } +/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise +/// return nullptr. \p A and \p B must have the same type. +static const SCEV *addSCEVOverflow(const SCEV *A, const SCEV *B, + ScalarEvolution &SE) { + if (!SE.willNotOverflow(Instruction::Add, false, A, B)) + return nullptr; + return SE.getAddExpr(A, B); +} + +/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise +/// return nullptr. \p A and \p B must have the same type. +static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, + ScalarEvolution &SE) { + if (!SE.willNotOverflow(Instruction::Mul, false, A, B)) + return nullptr; + return SE.getMulExpr(A, B); +} + +/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at +/// \p MaxBTC is guaranteed inbounds of the accessed object. +static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, + const SCEV *MaxBTC, + const SCEV *EltSize, + ScalarEvolution &SE, + const DataLayout &DL) { + auto *PointerBase = SE.getPointerBase(AR->getStart()); + auto *StartPtr = dyn_cast(PointerBase); + if (!StartPtr) + return false; + bool CheckForNonNull, CheckForFreed; + uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( + DL, CheckForNonNull, CheckForFreed); + + if (CheckForNonNull || CheckForFreed) + return false; + + const SCEV *Step = AR->getStepRecurrence(SE); + bool IsKnownNonNegative = SE.isKnownNonNegative(Step); + if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) + return false; + + Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); + Step = SE.getNoopOrSignExtend(Step, WiderTy); + MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); + + // For the computations below, make sure they don't unsigned wrap. + if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr)) + return false; + const SCEV *StartOffset = SE.getNoopOrZeroExtend( + SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy); + + const SCEV *OffsetAtLastIter = + mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, false), SE); + if (!OffsetAtLastIter) + return false; + + const SCEV *OffsetEndBytes = addSCEVOverflow( + OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE); + if (!OffsetEndBytes) + return false; + + if (IsKnownNonNegative) { + // For positive steps, check if + // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes, + // while making sure none of the computations unsigned wrap themselves. + const SCEV *EndBytes = addSCEVOverflow(StartOffset, OffsetEndBytes, SE); + if (!EndBytes) + return false; + return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, + SE.getConstant(WiderTy, DerefBytes)); + } + + // For negative steps check if + // * StartOffset >= (MaxBTC * Step + EltSize) + // * StartOffset <= DerefBytes. + assert(SE.isKnownNegative(Step) && "must be known negative"); + return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && + SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, + SE.getConstant(WiderTy, DerefBytes)); +} + std::pair llvm::getStartAndEndForAccess( - const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount, - ScalarEvolution *SE, + const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, + const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap, std::pair> *PointerBounds) { std::pair *PtrBoundsPair; @@ -206,11 +287,37 @@ std::pair llvm::getStartAndEndForAccess( const SCEV *ScStart; const SCEV *ScEnd; + auto &DL = Lp->getHeader()->getDataLayout(); + Type *IdxTy = DL.getIndexType(PtrExpr->getType()); + const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); if (SE->isLoopInvariant(PtrExpr, Lp)) { ScStart = ScEnd = PtrExpr; } else if (auto *AR = dyn_cast(PtrExpr)) { ScStart = AR->getStart(); - ScEnd = AR->evaluateAtIteration(MaxBECount, *SE); + if (!isa(BTC)) + // Evaluating AR at an exact BTC is safe: LAA separately checks that + // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then + // the loop either triggers UB when executing a memory access with a + // poison pointer or the wrapping/poisoned pointer is not used. + ScEnd = AR->evaluateAtIteration(BTC, *SE); + else { + // Evaluating AR at MaxBTC may wrap and create an expression that is less + // than the start of the AddRec due to wrapping (for example consider + // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd + // will get incremented by EltSize before returning, so this effectively + // sets ScEnd to the maximum unsigned value for the type. Note that LAA + // separately checks that accesses cannot not wrap, so unsigned max + // represents an upper bound. + if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, + DL)) { + ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); + } else { + ScEnd = SE->getAddExpr( + SE->getNegativeSCEV(EltSizeSCEV), + SE->getSCEV(ConstantExpr::getIntToPtr( + ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType()))); + } + } const SCEV *Step = AR->getStepRecurrence(*SE); // For expressions with negative step, the upper bound is ScStart and the @@ -232,9 +339,6 @@ std::pair llvm::getStartAndEndForAccess( assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant"); // Add the size of the pointed element to ScEnd. - auto &DL = Lp->getHeader()->getDataLayout(); - Type *IdxTy = DL.getIndexType(PtrExpr->getType()); - const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); std::pair Res = {ScStart, ScEnd}; @@ -250,9 +354,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze) { - const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount(); - const auto &[ScStart, ScEnd] = getStartAndEndForAccess( - Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds()); + const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); + const SCEV *BTC = PSE.getBackedgeTakenCount(); + const auto &[ScStart, ScEnd] = + getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, + PSE.getSE(), &DC.getPointerBounds()); assert(!isa(ScStart) && !isa(ScEnd) && "must be able to compute both start and end expressions"); @@ -1907,11 +2013,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( // required for correctness. if (SE.isLoopInvariant(Src, InnermostLoop) || SE.isLoopInvariant(Sink, InnermostLoop)) { - const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount(); - const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( - InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds); - const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( - InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds); + const SCEV *BTC = PSE.getBackedgeTakenCount(); + const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); + const auto &[SrcStart_, SrcEnd_] = + getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, + PSE.getSE(), &PointerBounds); + const auto &[SinkStart_, SinkEnd_] = + getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, + PSE.getSE(), &PointerBounds); if (!isa(SrcStart_) && !isa(SrcEnd_) && !isa(SinkStart_) && diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll index 41db3802daffa..d4cd61bfde2f4 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll @@ -72,10 +72,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_501_iterations_kno ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: (2004 + %B)) +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (2004 + %A)) +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -131,10 +131,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_not ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: (2000 + %B)) +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (2000 + %A)) +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -247,10 +247,10 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1001_iterations_kno ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: (4004 + %B)) +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (4004 + %A)) +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -305,10 +305,10 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations_not ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: (4000 + %B)) +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (4000 + %A)) +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -350,6 +350,7 @@ e.2: ret i32 2 } + define i32 @not_all_exits_dominate_latch(ptr %A, ptr %B) { ; CHECK-LABEL: 'not_all_exits_dominate_latch' ; CHECK-NEXT: loop.header: @@ -407,10 +408,10 @@ define i32 @b3_does_not_dominate_latch_known_deref(ptr dereferenceable(4000) %A, ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: (4004 + %B)) +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (4004 + %A)) +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -462,10 +463,10 @@ define i32 @b3_does_not_dominate_latch_not_known_deref(ptr %A, ptr %B) { ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: (4004 + %B)) +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%B,+,4}<%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: (4004 + %A)) +; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) ; CHECK-NEXT: Member: {%A,+,4}<%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll new file mode 100644 index 0000000000000..54d671c62dbf4 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Note: The datalayout for the test specifies a 32 bit index type. + +; No UB: accessing last valid byte, pointer after the object +; doesnt wrap (%p + 2147483647). +define void @pointer_after_object_does_not_wrap(i32 %y, ptr %s, ptr %p) { +; CHECK-LABEL: 'pointer_after_object_does_not_wrap' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: (%y + %p) High: (2147483647 + %p)) +; CHECK-NEXT: Member: {(%y + %p),+,1}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (%y + %s) High: (2147483647 + %s)) +; CHECK-NEXT: Member: {(%y + %s),+,1}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ] + %gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv + %load = load i8, ptr %gep1.iv, align 4 + %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv + store i8 %load, ptr %gep2.iv, align 4 + %iv.next = add nsw i32 %iv, 1 + %c.2 = icmp slt i32 %iv.next, 2147483647 + br i1 %c.2, label %loop, label %exit + +exit: + ret void +} + +; UB: accessing %p + 2147483646 and p + 2147483647. +; Pointer the past the object would wrap in signed. +define void @pointer_after_object_would_wrap(i32 %y, ptr %s, ptr %p) { +; CHECK-LABEL: 'pointer_after_object_would_wrap' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: (%y + %p) High: (-2147483648 + %p)) +; CHECK-NEXT: Member: {(%y + %p),+,1}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (%y + %s) High: (-2147483648 + %s)) +; CHECK-NEXT: Member: {(%y + %s),+,1}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ] + %gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv + %load = load i16, ptr %gep1.iv, align 4 + %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv + store i16 %load, ptr %gep2.iv, align 4 + %iv.next = add nsw i32 %iv, 1 + %c.2 = icmp slt i32 %iv.next, 2147483647 + br i1 %c.2, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll index 906bf5ce845f1..e319c89577e24 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll @@ -3,7 +3,6 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" -; FIXME: Start == End for access group with AddRec. define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) { ; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1' ; CHECK-NEXT: loop: @@ -17,7 +16,7 @@ define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, ; CHECK-NEXT: ptr %S ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: ((4 * %y) + %P) High: ((4 * %y) + %P)) +; CHECK-NEXT: (Low: ((4 * %y) + %P) High: inttoptr (i32 -1 to ptr)) ; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %S High: (4 + %S)) @@ -44,7 +43,6 @@ exit: ret void } -; FIXME: Start > End for access group with AddRec. define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) { ; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2' ; CHECK-NEXT: loop: @@ -58,7 +56,7 @@ define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i ; CHECK-NEXT: ptr %S ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: ((4 * %y) + %P) High: (-4 + (4 * %y) + %P)) +; CHECK-NEXT: (Low: ((4 * %y) + %P) High: inttoptr (i32 -1 to ptr)) ; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %S High: (4 + %S)) @@ -137,8 +135,8 @@ exit: ret i32 %res } -; FIXME: evaluating at symbolic max BTC wraps around to a positive -; offset: (2 + (2 * %y) + %P) +; Evaluating at symbolic max BTC wraps around to a positive +; offset: (2 + (2 * %y) + %P). define void @runtime_check_with_symbolic_max_wraps_to_positive_offset(ptr %P, ptr %S, i32 %x, i32 %y) { ; CHECK-LABEL: 'runtime_check_with_symbolic_max_wraps_to_positive_offset' ; CHECK-NEXT: loop: @@ -152,7 +150,7 @@ define void @runtime_check_with_symbolic_max_wraps_to_positive_offset(ptr %P, pt ; CHECK-NEXT: ptr %S ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: ((2 * %y) + %P) High: (2 + (2 * %y) + %P)) +; CHECK-NEXT: (Low: ((2 * %y) + %P) High: inttoptr (i32 -1 to ptr)) ; CHECK-NEXT: Member: {((2 * %y) + %P),+,2}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %S High: (4 + %S)) diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll index ee65dc8cdcb1d..3e88672f29242 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -16,14 +16,32 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> poison, i8 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i8> [[TMP9]], i8 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i8> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 @@ -99,14 +117,32 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 @@ -183,14 +219,32 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 @@ -265,14 +319,32 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0