From e3db8373deaa19358999afa2978b054f8f71b5ad Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 2 Oct 2024 16:27:03 +0100 Subject: [PATCH 1/7] [VPlan] Add initial CFG simplification, removing BranchOnCond true. --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 39 +++++++++++++++++++ .../first-order-recurrence-fold-tail.ll | 7 ++-- .../LoopVectorize/AArch64/pr73894.ll | 6 +-- .../AArch64/sve-tail-folding-optsize.ll | 4 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 4 +- .../LoopVectorize/RISCV/short-trip-count.ll | 8 ++-- ...ectorize-force-tail-with-evl-interleave.ll | 4 +- .../vectorize-force-tail-with-evl-iv32.ll | 4 +- ...ze-force-tail-with-evl-masked-loadstore.ll | 4 +- ...e-force-tail-with-evl-ordered-reduction.ll | 6 +-- .../RISCV/vectorize-vp-intrinsics.ll | 4 +- .../SystemZ/force-target-instruction-cost.ll | 4 +- .../LoopVectorize/SystemZ/pr47665.ll | 4 +- .../X86/divs-with-tail-folding.ll | 6 +-- .../X86/fp64_to_uint32-cost-model.ll | 2 +- .../X86/vect.omp.force.small-tc.ll | 4 +- .../X86/vectorize-force-tail-with-evl.ll | 4 +- .../X86/vectorize-interleaved-accesses-gap.ll | 4 +- llvm/test/Transforms/LoopVectorize/as_cast.ll | 2 +- .../LoopVectorize/dead_instructions.ll | 8 ++-- .../dont-fold-tail-for-divisible-TC.ll | 4 +- .../LoopVectorize/memdep-fold-tail.ll | 4 +- .../pr45679-fold-tail-by-masking.ll | 8 ++-- .../pr46525-expander-insertpoint.ll | 4 +- .../pr51614-fold-tail-by-masking.ll | 6 +-- .../LoopVectorize/select-reduction.ll | 12 +++--- ...e-reduction-results-in-tail-folded-loop.ll | 6 +-- .../strict-fadd-interleave-only.ll | 12 +++--- .../tail-folding-alloca-in-loop.ll | 4 +- .../tail-folding-counting-down.ll | 2 +- .../LoopVectorize/tail-folding-switch.ll | 4 +- 32 files changed, 117 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 02bacde3f60a7..782e15f1d879f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2677,6 +2677,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, Twine(Prefix) + "scalar.ph"); replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader); + } /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f23795f87d467..7f9c04bc931dd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1467,6 +1467,44 @@ void VPlanTransforms::truncateToMinimalBitwidths( "some entries in MinBWs haven't been processed"); } +/// Remove BranchOnCond recipes with constant conditions together with removing +/// dead edges to their successors. Remove blocks that become dead (no remaining +/// predecessors()) +static void simplifyCFG(VPlan &Plan) { + using namespace llvm::VPlanPatternMatch; + // Start by collecting successor blocks known to not be taken from + // branch-on-cond terminator in their predecessor. + SmallVector WorkList; + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + VPRecipeBase *Term = VPBB->getTerminator(); + if (!Term || !match(Term, m_BranchOnCond(m_True()))) + continue; + VPBasicBlock *DeadSucc = cast(VPBB->getSuccessors()[1]); + VPBlockUtils::disconnectBlocks(VPBB, DeadSucc); + Term->eraseFromParent(); + WorkList.push_back(DeadSucc); + } + + VPValue Tmp; + // Remove any block in the worklist if it doesn't have any predecessors left. + // Disconnect the block from all successors and queue them for removal. + while (!WorkList.empty()) { + VPBlockBase *VPBB = WorkList.pop_back_val(); + if (VPBB->getNumPredecessors() != 0) + continue; + for (VPBlockBase *Succ : to_vector(VPBB->getSuccessors())) { + VPBlockUtils::disconnectBlocks(VPBB, Succ); + WorkList.push_back(Succ); + } + // Replace all uses of values defined in VPB with a dummy VPValue to + // facilitate removal. All blocks with remaining uses of the dummy value + // must be removed by subsequent operation. + VPBB->dropAllReferences(&Tmp); + delete VPBB; + } +} + void VPlanTransforms::optimize(VPlan &Plan) { runPass(removeRedundantCanonicalIVs, Plan); runPass(removeRedundantInductionCasts, Plan); @@ -1480,6 +1518,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { runPass(createAndOptimizeReplicateRegions, Plan); runPass(mergeBlocksIntoPredecessors, Plan); + runPass(simplifyCFG,Plan); runPass(licm, Plan); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index 86a9af6fd5a3c..2cacbc4c8446c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -71,14 +71,13 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3 -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SEXT:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll index a70eafb6078a0..0def9aabd4942 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll @@ -54,10 +54,10 @@ define i32 @pr70988(ptr %src, i32 %n) { ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP17]], i32 [[TMP18]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDUC:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDUC_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index 0f9ac7bf3870b..1d6b3f5b4fd93 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -37,9 +37,9 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_06:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index da859963a7021..749e1f62334bc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -58,9 +58,9 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index 3386a7d3972aa..e92975bb4534d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -23,9 +23,9 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: call void @llvm.masked.store.nxv1i32.p0( [[TMP6]], ptr [[TMP8]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -80,9 +80,9 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP8]], ptr [[TMP10]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index d7accc1b565d5..4f2a904e32969 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -66,9 +66,9 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll index 8addd359855e0..22ebecb3bd9c7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll @@ -44,9 +44,9 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[IV_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll index 4452f3860274d..ae5dcafe78dab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -58,9 +58,9 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll index f0fac609eee1c..e9ad02fffae09 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll @@ -46,10 +46,10 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 9069c6bf7a1a0..aae52d337c508 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -49,9 +49,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll index 385f7621801b4..082e3266e7c8f 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -37,9 +37,9 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 30, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index 2de0f7e4d4016..bc0ccfb45c057 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -117,9 +117,9 @@ define void @test(ptr %p, i40 %a) { ; CHECK: pred.store.continue30: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index f53b7c2a759ba..2db58fe1caa7f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -44,9 +44,9 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -160,7 +160,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll index 03783d3a6c9fb..2e4af40651f7d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll @@ -29,7 +29,7 @@ for.body: ; preds = %for.body.preheader, store i32 %conv, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %2 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp ult i32 %2, %0 + %cmp = icmp ult i32 %2, 1000 br i1 %cmp, label %for.body, label %for.end.loopexit for.end.loopexit: ; preds = %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 90ba702ed232e..de6530868275d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -150,9 +150,9 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll index e37eae4c1f390..f7ab1b626c3e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -42,9 +42,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index 844fe1873bc98..349001cc7ab41 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -75,9 +75,9 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll index 58a8c3d078f02..67aacefebd555 100644 --- a/llvm/test/Transforms/LoopVectorize/as_cast.ll +++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll @@ -29,7 +29,7 @@ loop: ; check that we branch to the exit block ; CHECK: middle.block: -; CHECK: br i1 true, label %exit, label %scalar.ph +; CHECK: br label %exit exit: ret void diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index e1fd07bf590c4..31fbe68d819d2 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -95,11 +95,11 @@ define void @pr47390(ptr %a) { ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 7, %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ 9, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ -1, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 53686ee76cbbd..ef3f13dfd7d9b 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -202,9 +202,9 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[RIVPLUS1:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index d1ad7e3f4fc0d..c9066f22c5592 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -69,9 +69,9 @@ define void @maxvf3() { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index c301ef3c5319a..e6926934ef42d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -57,9 +57,9 @@ define void @pr45679(ptr %A) optsize { ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] @@ -256,9 +256,9 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index 1e1b03b6cd8cf..f4f33b17ce48f 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -32,9 +32,9 @@ define void @test(i16 %x, i64 %y, ptr %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll index 705152662be24..77794dcb9369d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -61,10 +61,10 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP26]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ -1, [[MIDDLE_BLOCK]] ], [ 41, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 41, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IVMINUS1:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 836115f381382..545c595e116fa 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -37,10 +37,10 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) -; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[NEXT:%.*]] = phi i32 [ [[SEL:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] @@ -98,10 +98,10 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ 12, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 12, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll index 57bc7b8337249..07911e5ec0d75 100644 --- a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll @@ -42,10 +42,10 @@ define void @pr75298_store_reduction_value_in_folded_loop(i64 %iv.start) optsize ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: store i32 [[TMP6]], ptr @a, align 4 -; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[PH]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[PH]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IV_START]], [[PH]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll index 760bdbf227fff..61e60741f0376 100644 --- a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll @@ -23,10 +23,10 @@ define float @pr70988() { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] @@ -129,10 +129,10 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NARROW:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 56fc8eac35bad..31bf18a4cbd69 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -53,9 +53,9 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[exit:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 204, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll index a757314ec7a46..d2d99827d5f35 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: vector.body: ; CHECK-LABEL: middle.block: -; CHECK-NEXT: br i1 true, +; CHECK-NEXT: br label %while.end.loopexit target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index 892ddccbc93b4..ddd22bbe1d063 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -54,9 +54,9 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] From 195677ae5b24b614ad822b92a928a7b282fcbf94 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 14 Feb 2025 20:17:04 +0000 Subject: [PATCH 2/7] !fixup update after rebase --- .../Transforms/Vectorize/LoopVectorize.cpp | 15 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 61 ++++---- .../AArch64/clamped-trip-count.ll | 14 +- .../AArch64/conditional-branches-cost.ll | 39 +++--- .../AArch64/divs-with-scalable-vfs.ll | 8 +- .../first-order-recurrence-fold-tail.ll | 4 +- .../AArch64/induction-costs-sve.ll | 29 ++-- .../AArch64/low_trip_count_predicates.ll | 5 +- .../LoopVectorize/AArch64/masked-call.ll | 97 ++----------- .../partial-reduce-dot-product-epilogue.ll | 6 +- .../partial-reduce-dot-product-neon.ll | 18 +-- .../AArch64/partial-reduce-dot-product.ll | 18 +-- .../AArch64/reduction-recurrence-costs-sve.ll | 20 ++- .../AArch64/scalable-strict-fadd.ll | 97 +++++++------ .../LoopVectorize/AArch64/store-costs-sve.ll | 8 +- .../sve-interleaved-masked-accesses.ll | 8 +- .../AArch64/sve-tail-folding-forced.ll | 4 +- .../sve-tail-folding-overflow-checks.ll | 4 +- .../AArch64/sve-tail-folding-reductions.ll | 36 ++--- .../AArch64/sve-tail-folding-unroll.ll | 8 +- .../LoopVectorize/AArch64/sve-tail-folding.ll | 41 +++--- .../LoopVectorize/AArch64/sve2-histcnt.ll | 2 +- .../AArch64/tail-folding-styles.ll | 16 +-- .../ARM/mve-gather-scatter-tailpred.ll | 16 +-- .../ARM/mve-hoist-runtime-checks.ll | 4 +- .../ARM/mve-reduction-predselect.ll | 16 +-- .../LoopVectorize/ARM/mve-reduction-types.ll | 54 ++++---- .../LoopVectorize/RISCV/inloop-reduction.ll | 24 ++-- .../LoopVectorize/RISCV/low-trip-count.ll | 12 +- .../LoopVectorize/RISCV/scalable-tailfold.ll | 26 ++-- .../truncate-to-minimal-bitwidth-cost.ll | 4 +- .../truncate-to-minimal-bitwidth-evl-crash.ll | 4 +- .../RISCV/type-info-cache-evl-crash.ll | 4 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 56 ++++---- ...-force-tail-with-evl-bin-unary-ops-args.ll | 72 +++++----- ...ize-force-tail-with-evl-call-intrinsics.ll | 36 ++--- ...ize-force-tail-with-evl-cast-intrinsics.ll | 40 +++--- ...rize-force-tail-with-evl-cond-reduction.ll | 24 ++-- ...ce-tail-with-evl-fixed-order-recurrence.ll | 46 +------ ...ze-force-tail-with-evl-inloop-reduction.ll | 108 +++++++-------- ...-force-tail-with-evl-intermediate-store.ll | 12 +- ...e-force-tail-with-evl-known-no-overflow.ll | 12 +- ...vectorize-force-tail-with-evl-reduction.ll | 108 +++++++-------- ...-force-tail-with-evl-reverse-load-store.ll | 25 ++-- ...e-force-tail-with-evl-safe-dep-distance.ll | 12 +- ...orize-force-tail-with-evl-uniform-store.ll | 5 +- .../RISCV/vplan-vp-intrinsics-reduction.ll | 28 +--- .../predicated-first-order-recurrence.ll | 7 +- .../LoopVectorize/X86/cost-model.ll | 6 +- .../X86/divs-with-tail-folding.ll | 2 +- ...bounds-flags-for-reverse-vector-pointer.ll | 8 +- .../LoopVectorize/X86/induction-costs.ll | 4 +- .../Transforms/LoopVectorize/X86/optsize.ll | 28 ++-- .../Transforms/LoopVectorize/X86/pr81872.ll | 17 ++- .../X86/scev-checks-unprofitable.ll | 4 +- .../LoopVectorize/X86/small-size.ll | 12 +- .../LoopVectorize/X86/tail_loop_folding.ll | 14 +- ...-order-recurrence-sink-replicate-region.ll | 95 +------------ .../LoopVectorize/first-order-recurrence.ll | 130 ++++++++---------- .../Transforms/LoopVectorize/loop-form.ll | 4 +- llvm/test/Transforms/LoopVectorize/optsize.ll | 12 +- .../pr45679-fold-tail-by-masking.ll | 16 +-- .../pr46525-expander-insertpoint.ll | 1 - .../LoopVectorize/reduction-inloop-pred.ll | 30 ++-- .../LoopVectorize/reduction-inloop-uf4.ll | 4 +- .../LoopVectorize/reduction-predselect.ll | 20 +-- .../LoopVectorize/select-reduction.ll | 3 +- ...e-reduction-results-in-tail-folded-loop.ll | 1 - .../strict-fadd-interleave-only.ll | 24 ++-- .../tail-folding-alloca-in-loop.ll | 2 +- .../tail-folding-vectorization-factor-1.ll | 9 +- .../vector-loop-backedge-elimination.ll | 30 ++-- .../vplan-sink-scalars-and-merge.ll | 20 +-- 73 files changed, 764 insertions(+), 1045 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 782e15f1d879f..7e9747c565efd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2507,7 +2507,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { auto *ResumePhi = dyn_cast(&R); if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi) continue; - ResumePhi->addOperand(ResumePhi->getOperand(1)); + ResumePhi->addOperand( + ResumePhi->getOperand(ScalarPH->getNumPredecessors() == 1 ? 0 : 1)); } } @@ -2676,8 +2677,6 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarPreHeader = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, Twine(Prefix) + "scalar.ph"); - replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader); - } /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV @@ -2810,6 +2809,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton( // faster. emitMemRuntimeChecks(LoopScalarPreHeader); + replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader); return LoopVectorPreHeader; } @@ -7568,10 +7568,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // Set PlanForEarlyExitLoop to true if the BestPlan has been built from a // loop with an uncountable early exit. The legacy cost model doesn't // properly model costs for such loops. + auto ExitBlocks = BestPlan.getExitBlocks(); bool PlanForEarlyExitLoop = - BestPlan.getVectorLoopRegion() && - BestPlan.getVectorLoopRegion()->getSingleSuccessor() != - BestPlan.getMiddleBlock(); + std::distance(ExitBlocks.begin(), ExitBlocks.end()) > 2 || + (std::distance(ExitBlocks.begin(), ExitBlocks.end()) == 1 && + (*ExitBlocks.begin())->getNumPredecessors() > 1); assert((BestFactor.Width == LegacyVF.Width || PlanForEarlyExitLoop || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), CostCtx, OrigLoop) || @@ -7884,6 +7885,7 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton( // Generate the induction variable. EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); + replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader); return LoopVectorPreHeader; } @@ -8038,6 +8040,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( // resume value for the induction variable comes from the trip count of the // main vector loop, passed as the second argument. createInductionAdditionalBypassValues(ExpandedSCEVs, EPI.VectorTripCount); + replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader); return LoopVectorPreHeader; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7f9c04bc931dd..a5e5e719f4ed1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1468,40 +1468,45 @@ void VPlanTransforms::truncateToMinimalBitwidths( } /// Remove BranchOnCond recipes with constant conditions together with removing -/// dead edges to their successors. Remove blocks that become dead (no remaining -/// predecessors()) +/// dead edges to their successors. static void simplifyCFG(VPlan &Plan) { using namespace llvm::VPlanPatternMatch; - // Start by collecting successor blocks known to not be taken from - // branch-on-cond terminator in their predecessor. - SmallVector WorkList; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { - VPRecipeBase *Term = VPBB->getTerminator(); - if (!Term || !match(Term, m_BranchOnCond(m_True()))) + if (VPBB->getNumSuccessors() != 2 || VPBB->begin() == VPBB->end() || + !match(&VPBB->back(), m_BranchOnCond(m_True()))) continue; - VPBasicBlock *DeadSucc = cast(VPBB->getSuccessors()[1]); - VPBlockUtils::disconnectBlocks(VPBB, DeadSucc); - Term->eraseFromParent(); - WorkList.push_back(DeadSucc); - } - VPValue Tmp; - // Remove any block in the worklist if it doesn't have any predecessors left. - // Disconnect the block from all successors and queue them for removal. - while (!WorkList.empty()) { - VPBlockBase *VPBB = WorkList.pop_back_val(); - if (VPBB->getNumPredecessors() != 0) - continue; - for (VPBlockBase *Succ : to_vector(VPBB->getSuccessors())) { - VPBlockUtils::disconnectBlocks(VPBB, Succ); - WorkList.push_back(Succ); + VPBasicBlock *DeadSucc = cast(VPBB->getSuccessors()[1]); + const auto &Preds = DeadSucc->getPredecessors(); + unsigned DeadIdx = std::distance(Preds.begin(), find(Preds, VPBB)); + + // Remove values coming from VPBB from phi-like recipes in DeadSucc. + for (VPRecipeBase &R : make_early_inc_range(*DeadSucc)) { + assert((!isa(&R) || + !isa(cast(&R)->getInstruction())) && + !isa(&R) && + "Cannot update VPIRInstructions wrapping phis or header phis yet"); + auto *VPI = dyn_cast(&R); + if (VPI && VPI->getOpcode() == VPInstruction::ResumePhi) { + VPBuilder B(VPI); + SmallVector NewOps; + // Create new operand list, with the dead incoming value filtered out. + for (const auto &[Idx, Op] : enumerate(VPI->operands())) { + if (Idx == DeadIdx) + continue; + NewOps.push_back(Op); + } + VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, NewOps, + VPI->getDebugLoc(), + VPI->getName())); + VPI->eraseFromParent(); + } } - // Replace all uses of values defined in VPB with a dummy VPValue to - // facilitate removal. All blocks with remaining uses of the dummy value - // must be removed by subsequent operation. - VPBB->dropAllReferences(&Tmp); - delete VPBB; + // Disconnect blocks and remove the terminator. DeadSucc will be deleted + // automatically on VPlan descruction. + VPBlockUtils::disconnectBlocks(VPBB, DeadSucc); + VPBB->back().eraseFromParent(); } } @@ -1514,11 +1519,11 @@ void VPlanTransforms::optimize(VPlan &Plan) { runPass(legalizeAndOptimizeInductions, Plan); runPass(removeRedundantExpandSCEVRecipes, Plan); runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType()); + simplifyCFG(Plan); runPass(removeDeadRecipes, Plan); runPass(createAndOptimizeReplicateRegions, Plan); runPass(mergeBlocksIntoPredecessors, Plan); - runPass(simplifyCFG,Plan); runPass(licm, Plan); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 3f2aef84a6a4c..36d4ee8c80d3a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -15,7 +15,6 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -42,10 +41,10 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[DST]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -101,7 +100,6 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -128,10 +126,10 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[DST]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index ce24d3cfded22..ec7b7fd6bc49a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -460,9 +460,9 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 104, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[FOR_BODY:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -470,7 +470,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: store i8 0, ptr [[GEP]], align 1 ; PRED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; PRED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -713,9 +713,9 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP28]], i32 0 ; PRED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; PRED-NEXT: br label [[LOOP_HEADER:%.*]] ; PRED: loop.header: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -821,9 +821,6 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 -; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8 -; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] -; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] @@ -850,10 +847,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 ; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY:%.*]] ] -; PRED-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[DST]], [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] @@ -978,9 +975,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; DEFAULT-NEXT: br label [[EXIT:%.*]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -1080,9 +1077,9 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -1517,9 +1514,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0 ; PRED-NEXT: br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; PRED-NEXT: br label [[LOOP_HEADER:%.*]] ; PRED: loop.header: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -1630,9 +1627,9 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; DEFAULT-NEXT: br label [[EXIT:%.*]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP_HEADER:%.*]] ; DEFAULT: loop.header: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -1693,9 +1690,9 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP_HEADER:%.*]] ; PRED: loop.header: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index a02987f86ae9c..6b61c12d5d6f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -160,9 +160,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP37:%.*]] = extractelement [[TMP36]], i32 0 ; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -287,9 +287,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP48:%.*]] = extractelement [[TMP47]], i32 0 ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index 2cacbc4c8446c..30e454d6e3b13 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -70,14 +70,14 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SEXT:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 69794b9798f1c..c0f3e67d29658 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -144,9 +144,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 ; PRED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -316,9 +316,9 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; PRED-NEXT: br label [[FOR_BODY:%.*]] ; PRED: for.body: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -453,7 +453,6 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 ; PRED-NEXT: [[TMP14:%.*]] = sub i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP14]], i64 0 @@ -511,10 +510,10 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0 ; PRED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; PRED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -650,7 +649,6 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 @@ -708,10 +706,10 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0 ; PRED-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; PRED-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -820,7 +818,6 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1 -; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 ; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -850,10 +847,10 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; PRED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 9b7e41aa98db6..55598d7e7570c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -430,7 +430,6 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -451,9 +450,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[WHILE_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[WHILE_END_LOOPEXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[WHILE_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[WHILE_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 07873fba86b6d..20e02d253b1c5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -50,12 +50,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; ; TFCOMMON-LABEL: @test_widen( ; TFCOMMON-NEXT: entry: -; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFCOMMON-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -78,12 +72,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; ; TFA_INTERLEAVE-LABEL: @test_widen( ; TFA_INTERLEAVE-NEXT: entry: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -190,12 +178,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; ; TFCOMMON-LABEL: @test_if_then( ; TFCOMMON-NEXT: entry: -; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFCOMMON-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -221,12 +203,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; ; TFA_INTERLEAVE-LABEL: @test_if_then( ; TFA_INTERLEAVE-NEXT: entry: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -355,12 +331,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; ; TFCOMMON-LABEL: @test_widen_if_then_else( ; TFCOMMON-NEXT: entry: -; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFCOMMON-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -389,12 +359,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; ; TFA_INTERLEAVE-LABEL: @test_widen_if_then_else( ; TFA_INTERLEAVE-NEXT: entry: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -644,12 +608,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; ; TFALWAYS-LABEL: @test_widen_optmask( ; TFALWAYS-NEXT: entry: -; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFALWAYS-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -672,12 +630,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; ; TFFALLBACK-LABEL: @test_widen_optmask( ; TFFALLBACK-NEXT: entry: -; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFFALLBACK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -700,12 +652,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; ; TFA_INTERLEAVE-LABEL: @test_widen_optmask( ; TFA_INTERLEAVE-NEXT: entry: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -817,12 +763,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; ; TFALWAYS-LABEL: @test_widen_fmuladd_and_call( ; TFALWAYS-NEXT: entry: -; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFALWAYS-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -852,12 +792,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; ; TFFALLBACK-LABEL: @test_widen_fmuladd_and_call( ; TFFALLBACK-NEXT: entry: -; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFFALLBACK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) @@ -887,12 +821,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; ; TFA_INTERLEAVE-LABEL: @test_widen_fmuladd_and_call( ; TFA_INTERLEAVE-NEXT: entry: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1020,14 +948,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; ; TFALWAYS-LABEL: @test_widen_exp_v2( ; TFALWAYS-NEXT: entry: -; TFALWAYS-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1 -; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 -; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFALWAYS-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2 -; TFALWAYS-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2 +; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0:%.*]], 1 +; TFALWAYS-NEXT: [[TMP1:%.*]] = sub i64 [[N_RND_UP]], 2 +; TFALWAYS-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N_RND_UP]], 2 ; TFALWAYS-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) +; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[N_RND_UP]]) ; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]] ; TFALWAYS: vector.body: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] @@ -1065,14 +990,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; ; TFFALLBACK-LABEL: @test_widen_exp_v2( ; TFFALLBACK-NEXT: entry: -; TFFALLBACK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1 -; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 -; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFFALLBACK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2 -; TFFALLBACK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2 +; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0:%.*]], 1 +; TFFALLBACK-NEXT: [[TMP1:%.*]] = sub i64 [[N_RND_UP]], 2 +; TFFALLBACK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N_RND_UP]], 2 ; TFFALLBACK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) +; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[N_RND_UP]]) ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] @@ -1111,9 +1033,6 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-LABEL: @test_widen_exp_v2( ; TFA_INTERLEAVE-NEXT: entry: ; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll index 85c9536dfbc43..456ee4208216e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll @@ -563,10 +563,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP182]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index 54dc6ce5d3a92..6461ffb8c7beb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -1724,10 +1724,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) -; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVE1-NEXT: br label [[EXIT:%.*]] ; CHECK-INTERLEAVE1: scalar.ph: -; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP182]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-INTERLEAVE1: for.body: ; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2079,10 +2079,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) -; CHECK-INTERLEAVED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVED-NEXT: br label [[EXIT:%.*]] ; CHECK-INTERLEAVED: scalar.ph: -; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP182]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-INTERLEAVED: for.body: ; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2434,10 +2434,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) -; CHECK-MAXBW-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-MAXBW-NEXT: br label [[EXIT:%.*]] ; CHECK-MAXBW: scalar.ph: -; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP182]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-MAXBW: for.body: ; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index a734a0e94198a..7fa4cf61b0841 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -2143,10 +2143,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) -; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVE1-NEXT: br label [[EXIT:%.*]] ; CHECK-INTERLEAVE1: scalar.ph: -; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-INTERLEAVE1: for.body: ; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2209,10 +2209,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) -; CHECK-INTERLEAVED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVED-NEXT: br label [[EXIT:%.*]] ; CHECK-INTERLEAVED: scalar.ph: -; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-INTERLEAVED: for.body: ; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2275,10 +2275,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE]]) -; CHECK-MAXBW-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-MAXBW-NEXT: br label [[EXIT:%.*]] ; CHECK-MAXBW: scalar.ph: -; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-MAXBW: for.body: ; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index ce2a5af52d442..953844c9bfdea 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -254,16 +254,12 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP41]]) -; PRED-NEXT: [[TMP45:%.*]] = call i32 @llvm.vscale.i32() -; PRED-NEXT: [[TMP46:%.*]] = mul i32 [[TMP45]], 4 -; PRED-NEXT: [[TMP47:%.*]] = sub i32 [[TMP46]], 1 -; PRED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[TMP29]], i32 [[TMP47]] -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; PRED-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] -; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP44]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] +; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ] +; PRED-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ 0, [[ENTRY1]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1]] ] +; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY1]] ] ; PRED-NEXT: br label [[LOOP1:%.*]] ; PRED: loop: ; PRED-NEXT: [[TMP48:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[LOOP1]] ] @@ -501,10 +497,10 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: [[TMP22:%.*]] = call i16 @llvm.vector.reduce.or.nxv4i16( [[TMP16]]) -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 61729c5616faf..fcc9827c475b0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -159,10 +159,10 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -426,10 +426,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = extractelement [[TMP45]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -651,60 +651,59 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = sub i64 [[TMP2]], [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP2]], [[TMP10]] -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv8i1( [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]]) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP15]], i32 4, [[INTERLEAVED_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP14]], i32 4, [[INTERLEAVED_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_MASKED_VEC]]) -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP18]]) -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP20]]) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP19]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP13]]) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = extractelement [[TMP22]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[A2]], [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[A1]], [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP24]], [[ADD_PHI2]] +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP23]], [[ADD_PHI2]] ; CHECK-ORDERED-TF-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP25]], [[ADD_PHI1]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP24]], [[ADD_PHI1]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-TF-NEXT: ret void @@ -937,10 +936,10 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1176,10 +1175,10 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1663,10 +1662,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement [[TMP60]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP50]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1994,10 +1993,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement [[TMP60]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: -; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP50]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 5aad94a67aa58..d33c0c2a54698 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -115,9 +115,9 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; PRED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -266,9 +266,9 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 ; PRED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index f2e0c9be2defe..419b0e11eb3a3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -146,7 +146,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: @@ -308,7 +308,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: @@ -479,7 +479,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: @@ -719,7 +719,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 1f7d0b745f929..fc7bb0da30b5d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -75,9 +75,9 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index f61d473a12ddb..960506d4ee199 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -32,7 +32,7 @@ define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %s ; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -96,7 +96,7 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index bd8fe69a44630..86713dc6fb9f0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -44,10 +44,10 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -99,10 +99,10 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 ; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: -; CHECK-IN-LOOP-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-IN-LOOP-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK-IN-LOOP: scalar.ph: -; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-IN-LOOP: while.body: ; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -171,10 +171,10 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -225,10 +225,10 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-IN-LOOP: middle.block: -; CHECK-IN-LOOP-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-IN-LOOP-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK-IN-LOOP: scalar.ph: -; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-IN-LOOP: while.body: ; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -303,10 +303,10 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP20]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP25]], [[MIDDLE_BLOCK]] ], [ 7, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 7, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -370,10 +370,10 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = extractelement [[TMP22]], i32 0 ; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-IN-LOOP: middle.block: -; CHECK-IN-LOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-IN-LOOP-NEXT: br label [[FOR_END:%.*]] ; CHECK-IN-LOOP: scalar.ph: -; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 7, [[ENTRY]] ] +; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 7, [[ENTRY]] ] ; CHECK-IN-LOOP-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-IN-LOOP: for.body: ; CHECK-IN-LOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 3f33ee883ac48..b8657ca0bb7ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -79,9 +79,9 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP76:%.*]] = extractelement [[TMP72]], i32 0 ; CHECK-NEXT: br i1 [[TMP76]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -206,9 +206,9 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[TMP98:%.*]] = extractelement [[TMP94]], i32 0 ; CHECK-NEXT: br i1 [[TMP98]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 75b2df93c9350..fba79b71e88ff 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -40,9 +40,9 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -99,9 +99,9 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -166,9 +166,9 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -217,7 +217,6 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] @@ -246,9 +245,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -318,9 +317,9 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -394,9 +393,9 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -474,9 +473,9 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -563,9 +562,9 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -635,9 +634,9 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -714,9 +713,9 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 3b00312959d8a..6fa6533673100 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -703,7 +703,7 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_EXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 7325230b84e81..30562e6143cf2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -83,9 +83,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA: middle.block: -; DATA-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; DATA-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA: scalar.ph: -; DATA-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DATA-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; DATA-NEXT: br label [[WHILE_BODY:%.*]] ; DATA: while.body: ; DATA-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -136,9 +136,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] ; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_NO_LANEMASK: middle.block: -; DATA_NO_LANEMASK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; DATA_NO_LANEMASK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA_NO_LANEMASK: scalar.ph: -; DATA_NO_LANEMASK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DATA_NO_LANEMASK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; DATA_NO_LANEMASK-NEXT: br label [[WHILE_BODY:%.*]] ; DATA_NO_LANEMASK: while.body: ; DATA_NO_LANEMASK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -184,9 +184,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 ; DATA_AND_CONTROL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL: middle.block: -; DATA_AND_CONTROL-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; DATA_AND_CONTROL-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA_AND_CONTROL: scalar.ph: -; DATA_AND_CONTROL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DATA_AND_CONTROL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; DATA_AND_CONTROL-NEXT: br label [[WHILE_BODY:%.*]] ; DATA_AND_CONTROL: while.body: ; DATA_AND_CONTROL-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -233,9 +233,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA_AND_CONTROL_NO_RT_CHECK: scalar.ph: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; DATA_AND_CONTROL_NO_RT_CHECK: while.body: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index fda9ef2cf6c2f..30847f381cc95 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -28,9 +28,9 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -222,9 +222,9 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -286,9 +286,9 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -351,9 +351,9 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll index 6e8bef26d3e83..9a0c62a98c13b 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll @@ -80,9 +80,9 @@ define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture nou ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[J_021_US:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_US:%.*]], [[INNER_LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index af43b747c0043..6d42dee565c87 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -22,7 +22,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -76,7 +76,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -129,7 +129,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -181,7 +181,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -233,7 +233,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -285,7 +285,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -337,7 +337,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -389,7 +389,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index 6f78982d7ab02..1563b4b0b26bd 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -37,10 +37,10 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -120,10 +120,10 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -197,10 +197,10 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -264,10 +264,10 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -331,10 +331,10 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ -1, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -398,10 +398,10 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -465,10 +465,10 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -532,10 +532,10 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -599,10 +599,10 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index b96a44a546a14..139308ae55c1a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -152,10 +152,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) -; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-OUTLOOP: for.body: ; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -208,10 +208,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.body: ; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -387,10 +387,10 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) -; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-OUTLOOP: for.body: ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -441,10 +441,10 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 10ac870c112ae..a50a98b48f44d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -70,9 +70,9 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0( [[TMP13]], ptr [[TMP14]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -137,9 +137,9 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0( [[TMP13]], ptr [[TMP14]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -204,9 +204,9 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0( [[TMP13]], ptr [[TMP14]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 3c35c272d3389..448c85d14a380 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -35,9 +35,9 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -99,9 +99,9 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -163,10 +163,10 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP12]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -229,9 +229,9 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -286,9 +286,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -379,9 +379,9 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 94c0036e4c8b6..8c31c378ecb3a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -172,9 +172,9 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]]) ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[F_039:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index ba7158eb02d90..b1fa95f05e125 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -38,9 +38,9 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index c95414db18bef..cb8ce4e574c19 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -60,9 +60,9 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 8cba459d1e672..9dabaa1128d02 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -115,9 +115,9 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -149,9 +149,9 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -455,9 +455,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -500,9 +500,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -650,9 +650,9 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -684,9 +684,9 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -821,9 +821,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -855,9 +855,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1017,9 +1017,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1077,9 +1077,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1255,9 +1255,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -1300,9 +1300,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -1448,9 +1448,9 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-SCALABLE: middle.block: -; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE-NEXT: br label [[FOR_END:%.*]] ; TF-SCALABLE: scalar.ph: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1482,9 +1482,9 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br label [[FOR_END:%.*]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll index e7181f7f30c77..88a0e9cf1f146 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll @@ -52,9 +52,9 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -146,9 +146,9 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -240,9 +240,9 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -334,9 +334,9 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -428,9 +428,9 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -522,9 +522,9 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -616,9 +616,9 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -710,9 +710,9 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -804,9 +804,9 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -898,9 +898,9 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -992,9 +992,9 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1086,9 +1086,9 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1180,9 +1180,9 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1277,9 +1277,9 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1372,9 +1372,9 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1467,9 +1467,9 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1562,9 +1562,9 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1710,9 +1710,9 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll index f19e581d1c028..4742f929ef3d0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll @@ -65,9 +65,9 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -180,9 +180,9 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -295,9 +295,9 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -410,9 +410,9 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -517,9 +517,9 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -617,9 +617,9 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -720,9 +720,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -829,9 +829,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -936,9 +936,9 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll index 7450c303c1045..e3a122dac29f8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll @@ -57,9 +57,9 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -157,9 +157,9 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -257,9 +257,9 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -357,9 +357,9 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -457,9 +457,9 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -557,9 +557,9 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -657,9 +657,9 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -757,9 +757,9 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -857,9 +857,9 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -957,9 +957,9 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 082386e39f3f6..e1ad9acf3da87 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -62,10 +62,10 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP20]]) -; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-OUTLOOP: for.body: ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -120,10 +120,10 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -314,10 +314,10 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PREDPHI]]) -; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-OUTLOOP: for.body: ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -385,10 +385,10 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index 7886867a0bcd8..6d41d8590944c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -57,14 +57,10 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 4 -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[VP_OP_LOAD]], i32 [[TMP23]] -; IF-EVL-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 33, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] ; IF-EVL: [[FOR_BODY]]: ; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] @@ -210,24 +206,8 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP26:%.*]] = mul i32 [[TMP25]], 4 -; IF-EVL-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[VP_OP_LOAD]], i32 [[TMP27]] -; IF-EVL-NEXT: [[TMP28:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], 4 -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i32 [[TMP29]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement [[TMP19]], i32 [[TMP30]] -; IF-EVL-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] -; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] -; IF-EVL: [[FOR_BODY]]: -; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[TMP31]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR1]], [[FOR2]] @@ -389,24 +369,8 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 4 -; IF-EVL-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[VP_OP_LOAD]], i32 [[TMP31]] -; IF-EVL-NEXT: [[TMP32:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP33:%.*]] = mul i32 [[TMP32]], 4 -; IF-EVL-NEXT: [[TMP34:%.*]] = sub i32 [[TMP33]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement [[TMP22]], i32 [[TMP34]] -; IF-EVL-NEXT: [[TMP35:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], 4 -; IF-EVL-NEXT: [[TMP37:%.*]] = sub i32 [[TMP36]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement [[TMP23]], i32 [[TMP37]] -; IF-EVL-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] -; IF-EVL-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT7]], %[[MIDDLE_BLOCK]] ], [ 11, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] ; IF-EVL: [[FOR_BODY]]: ; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll index 6e7f634ce9459..92b451237a75f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll @@ -47,10 +47,10 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -159,10 +159,10 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[MUL]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -276,10 +276,10 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -393,10 +393,10 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -510,10 +510,10 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -627,10 +627,10 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -747,10 +747,10 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -867,10 +867,10 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -987,10 +987,10 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1107,10 +1107,10 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1219,10 +1219,10 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[MUL]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -1337,10 +1337,10 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1459,10 +1459,10 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1576,10 +1576,10 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -1690,10 +1690,10 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -1812,10 +1812,10 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1943,10 +1943,10 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2070,10 +2070,10 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll index 69988e03b2657..945a95710862d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll @@ -69,10 +69,10 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) ; IF-EVL-OUTLOOP-NEXT: store i32 [[TMP23]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] -; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY1]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL-OUTLOOP: for.body: ; IF-EVL-OUTLOOP-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -132,10 +132,10 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] -; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll index b4ef2c800f5bd..096ca35ecfe06 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll @@ -47,9 +47,9 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -123,9 +123,9 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -195,9 +195,9 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index ed44fe33999ad..88cd1bca04e76 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -47,10 +47,10 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -161,10 +161,10 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP6]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -280,10 +280,10 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP14]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -400,10 +400,10 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP14]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -520,10 +520,10 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP14]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -642,10 +642,10 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -769,10 +769,10 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[TMP15]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -896,10 +896,10 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[TMP15]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1023,10 +1023,10 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP15]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1148,10 +1148,10 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1262,10 +1262,10 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[TMP6]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -1383,10 +1383,10 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP15]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1510,10 +1510,10 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP15]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1629,10 +1629,10 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -1743,10 +1743,10 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY1]] ] ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -1866,10 +1866,10 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1997,10 +1997,10 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -2124,10 +2124,10 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[FOR_END:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 5b579b0749c67..91924e4ba5945 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -22,15 +22,13 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] -; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] @@ -55,10 +53,10 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[LOOPEND:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[STARTVAL]], [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] @@ -123,15 +121,13 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] -; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 ; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0 @@ -171,10 +167,10 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[LOOPEND:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ] -; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[STARTVAL]], [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.body: ; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] @@ -265,7 +261,6 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16 -; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[N_VEC]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -306,9 +301,9 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: loop: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll index 313511d017f65..29f743a14cd87 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll @@ -45,9 +45,9 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: loop: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -365,9 +365,9 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: loop: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -450,9 +450,9 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: loop: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index 0829bab26f062..858d1da70e783 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -27,7 +27,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[SPEC_SELECT]], [[N_VEC]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -51,9 +50,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[SPEC_SELECT]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 3594b3f047363..30934e4c072c9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -62,19 +62,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: middle.block: ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-from-end vp<[[RDX]]>, ir<1> -; IF-EVL-OUTLOOP-NEXT: EMIT branch-on-cond ir -; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph -; IF-EVL-OUTLOOP-EMPTY: -; IF-EVL-OUTLOOP-NEXT: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> -; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb -; IF-EVL-OUTLOOP-EMPTY: -; IF-EVL-OUTLOOP-NEXT: ir-bb: -; IF-EVL-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[IV_RESUME]]> from scalar.ph) -; IF-EVL-OUTLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] -; IF-EVL-OUTLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n -; IF-EVL-OUTLOOP-NEXT: No successors +; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: ; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) @@ -113,19 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: middle.block: ; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-from-end vp<[[RDX]]>, ir<1> -; IF-EVL-INLOOP-NEXT: EMIT branch-on-cond ir -; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph -; IF-EVL-INLOOP-EMPTY: -; IF-EVL-INLOOP-NEXT: scalar.ph: -; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> -; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb -; IF-EVL-INLOOP-EMPTY: -; IF-EVL-INLOOP-NEXT: ir-bb: -; IF-EVL-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[IV_RESUME]]> from scalar.ph) -; IF-EVL-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] -; IF-EVL-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n -; IF-EVL-INLOOP-NEXT: No successors +; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: ; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 7b0fa644ea001..ba99f51664986 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -63,11 +63,10 @@ define void @func_21() { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 6, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 1f171414242bc..dcf410b87cec9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1056,10 +1056,10 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[TMP1]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[PH]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 3, [[PH]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[PH]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 3, [[PH]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index 2db58fe1caa7f..dcb089bce74ae 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -162,7 +162,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index 3d23090dd1235..0fb1e2cec2b19 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -42,10 +42,10 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP12]]) -; CHECK-NEXT: br i1 true, label [[FOR_END36:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -2, [[MIDDLE_BLOCK]] ], [ 10, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY20:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC35:%.*]] ] @@ -64,7 +64,7 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[SUM_1]] = or i32 [[REM27_PN]], [[SUM_01]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_END36]], label [[FOR_BODY20]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[EXIT]], label [[FOR_BODY20]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_INC35]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[CMP41:%.*]] = icmp eq i32 [[SUM_1_LCSSA]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 6dc8fec23575a..eaa59af6cefc5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -639,9 +639,9 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 9e87cc29be4e8..6a51756392a69 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -32,9 +32,9 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -71,9 +71,9 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 ; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: middle.block: -; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; AUTOVF-NEXT: br label [[FOR_END:%.*]] ; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 224, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] ; AUTOVF: for.body: ; AUTOVF-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -132,9 +132,9 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -171,9 +171,9 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 ; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: middle.block: -; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; AUTOVF-NEXT: br label [[FOR_END:%.*]] ; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 224, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] ; AUTOVF: for.body: ; AUTOVF-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -369,8 +369,6 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP3]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -72 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i32 [[TMP4]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <64 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT3]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -388,9 +386,9 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] @@ -415,8 +413,6 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8 ; AUTOVF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; AUTOVF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP3]], 1 -; AUTOVF-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -72 -; AUTOVF-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i32 [[TMP4]] ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] @@ -434,9 +430,9 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; AUTOVF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; AUTOVF: middle.block: -; AUTOVF-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; AUTOVF-NEXT: br label [[EXIT:%.*]] ; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] +; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ] ; AUTOVF-NEXT: br label [[LOOP:%.*]] ; AUTOVF: loop: ; AUTOVF-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index a190e94a01489..88fc0ba5a7060 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -40,15 +40,15 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 99, [[BB5:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 ; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] ; CHECK: bb18: ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] @@ -57,7 +57,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -96,13 +96,12 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 3} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23} ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 3} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]]} +; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll index 868e07aa0490e..d90c8d75ed671 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll @@ -37,9 +37,9 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> zeroinitializer, ptr [[TMP2]], i32 1, <16 x i1> [[TMP0]]) ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT_1_LOOPEXIT1:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[EXIT_1_LOOPEXIT1:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_3_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_3_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP_3:.*]] ; CHECK: [[LOOP_2_PREHEADER]]: ; CHECK-NEXT: br label %[[LOOP_2:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 8914edf28372f..f1a7b65e6b30d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -122,7 +122,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_PREHEADER_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_PREHEADER_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH5:%.*]] ; CHECK: ..preheader_crit_edge: @@ -204,13 +204,13 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC12]] ; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK28:%.*]], label [[VECTOR_BODY13]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block28: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH8]] -; CHECK: scalar.ph7: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: scalar.ph7: +; CHECK-NEXT: br label [[DOTLR_PH1:%.*]] ; CHECK: .lr.ph5: ; CHECK-NEXT: br i1 poison, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 poison, label [[DOTLR_PH]], label [[DOTLR_PH1]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -326,7 +326,7 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -512,7 +512,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP26:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[TMP26:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP25:%.*]] ; CHECK: 25: diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 428ed94dadb89..92a0befee939a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -32,9 +32,9 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 432, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -100,9 +100,9 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 432, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -186,10 +186,10 @@ define i32 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP11]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 0b2e7fe484390..33b449b602d72 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -76,20 +76,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) -; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors @@ -167,20 +154,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) -; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors @@ -240,23 +214,8 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> -; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) -; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ] -; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 -; CHECK-NEXT: No successors +; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1> +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]> from middle.block) @@ -358,20 +317,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) -; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors @@ -456,20 +402,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) -; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors @@ -508,7 +441,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<2> + vp<[[VEC_TC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -547,20 +479,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%l>, ir<1> -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[END]]>, ir<2> -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) -; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK: IR %ec = icmp ugt i64 %iv, 3 -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 764e6fe1fc333..da7ca066f113c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -2686,7 +2686,6 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2786,12 +2785,11 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3 -; UNROLL-NO-IC-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29:![0-9]+]] +; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ] ; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-IC: bb1: ; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] @@ -2804,7 +2802,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF30:![0-9]+]], !llvm.loop [[LOOP31:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region( ; UNROLL-NO-VF-NEXT: bb: @@ -2817,7 +2815,6 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ] @@ -2852,11 +2849,11 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] -; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29:![0-9]+]] +; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ] +; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: ; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] @@ -2869,7 +2866,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF30:![0-9]+]], !llvm.loop [[LOOP31:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region( ; SINK-AFTER-NEXT: bb: @@ -2882,7 +2879,6 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2939,12 +2935,11 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 -; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29:![0-9]+]] +; SINK-AFTER-NEXT: br label [[BB1:%.*]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ] +; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ] ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: ; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] @@ -2957,7 +2952,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF30:![0-9]+]], !llvm.loop [[LOOP31:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]] ; bb: br label %bb2 @@ -2991,7 +2986,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -3150,17 +3144,16 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP31:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]] ; UNROLL-NO-IC-NEXT: [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3 -; UNROLL-NO-IC-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] +; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP75]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ] ; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-IC: bb1: ; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ] @@ -3177,7 +3170,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF30]], !llvm.loop [[LOOP33:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29]], !llvm.loop [[LOOP32:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NO-VF-NEXT: bb: @@ -3190,7 +3183,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] @@ -3236,15 +3228,15 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = select i1 [[TMP5]], i32 [[TMP11]], i32 [[VEC_PHI1]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP31:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]] -; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] +; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ] +; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: ; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] @@ -3261,7 +3253,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF30]], !llvm.loop [[LOOP33:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29]], !llvm.loop [[LOOP32:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region_multiple( ; SINK-AFTER-NEXT: bb: @@ -3274,7 +3266,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 -; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] @@ -3359,16 +3350,15 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP31:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 -; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] +; SINK-AFTER-NEXT: br label [[BB1:%.*]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] -; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP39]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ] +; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ] ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: ; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] @@ -3385,7 +3375,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF30]], !llvm.loop [[LOOP33:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29]], !llvm.loop [[LOOP32:![0-9]+]] ; bb: br label %bb2 @@ -3435,7 +3425,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 @@ -3457,7 +3447,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] ; UNROLL-NO-IC-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[FOR_LCSSA]] @@ -3485,7 +3475,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP9]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-VF-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: @@ -3505,7 +3495,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] ; UNROLL-NO-VF-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[FOR_LCSSA]] @@ -3530,7 +3520,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 @@ -3552,7 +3542,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] ; SINK-AFTER-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[FOR_LCSSA]] @@ -3598,7 +3588,7 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -3613,7 +3603,7 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 -; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -3632,7 +3622,7 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-VF-NEXT: [[TMP2]] = add i16 [[TMP1]], 5 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: @@ -3646,7 +3636,7 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -3664,7 +3654,7 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 -; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -3679,7 +3669,7 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 ; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 -; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -3707,21 +3697,20 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4 ; UNROLL-NO-IC-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1 -; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 +; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC:%.*]], align 4 ; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1 -; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret i32 0 ; @@ -3745,7 +3734,7 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1 ; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1 -; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL-NO-VF: exit: ; UNROLL-NO-VF-NEXT: ret i32 0 ; @@ -3755,21 +3744,20 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: -; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4 ; SINK-AFTER-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; SINK-AFTER-NEXT: br label [[EXIT:%.*]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] ; SINK-AFTER: loop: ; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] ; SINK-AFTER-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ] ; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1 -; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4 +; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC:%.*]], align 4 ; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1 -; SINK-AFTER-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; SINK-AFTER: exit: ; SINK-AFTER-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 4a9380b3f35e8..c9b3ccf2d820c 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -81,9 +81,9 @@ define void @bottom_tested(ptr %p, i32 %n) { ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TAILFOLD: middle.block: -; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] +; TAILFOLD-NEXT: br label [[IF_END:%.*]] ; TAILFOLD: scalar.ph: -; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] ; TAILFOLD: for.cond: ; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 738f265b89d65..6e1d14fe247ef 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -700,9 +700,9 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -749,9 +749,9 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; PGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 ; PGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PGSO: [[MIDDLE_BLOCK]]: -; PGSO-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; PGSO-NEXT: br label %[[FOR_END:.*]] ; PGSO: [[SCALAR_PH]]: -; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; PGSO-NEXT: br label %[[FOR_BODY:.*]] ; PGSO: [[FOR_BODY]]: ; PGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -798,9 +798,9 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 ; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: -; NPGSO-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; NPGSO-NEXT: br label %[[FOR_END:.*]] ; NPGSO: [[SCALAR_PH]]: -; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; NPGSO-NEXT: br label %[[FOR_BODY:.*]] ; NPGSO: [[FOR_BODY]]: ; NPGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index e6926934ef42d..b878a93d4bb07 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -119,9 +119,9 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2UF2: middle.block: -; VF2UF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; VF2UF2-NEXT: br label [[EXIT:%.*]] ; VF2UF2: scalar.ph: -; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; VF2UF2-NEXT: br label [[LOOP:%.*]] ; VF2UF2: loop: ; VF2UF2-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] @@ -176,9 +176,9 @@ define void @pr45679(ptr %A) optsize { ; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF1UF4: middle.block: -; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; VF1UF4-NEXT: br label [[EXIT:%.*]] ; VF1UF4: scalar.ph: -; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; VF1UF4-NEXT: br label [[LOOP:%.*]] ; VF1UF4: loop: ; VF1UF4-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] @@ -323,9 +323,9 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2UF2: middle.block: -; VF2UF2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; VF2UF2-NEXT: br label [[FOR_END:%.*]] ; VF2UF2: scalar.ph: -; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; VF2UF2-NEXT: br label [[FOR_BODY:%.*]] ; VF2UF2: for.body: ; VF2UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -385,9 +385,9 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF1UF4: middle.block: -; VF1UF4-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; VF1UF4-NEXT: br label [[FOR_END:%.*]] ; VF1UF4: scalar.ph: -; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; VF1UF4-NEXT: br label [[FOR_BODY:%.*]] ; VF1UF4: for.body: ; VF1UF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index f4f33b17ce48f..d4a6aed472832 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -23,7 +23,6 @@ define void @test(i16 %x, i64 %y, ptr %ptr) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[INC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 8e132ed8399cd..288993b6be826 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -61,7 +61,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -171,7 +171,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -265,7 +265,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -376,7 +376,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -489,7 +489,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -599,7 +599,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -707,7 +707,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -813,7 +813,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -919,7 +919,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1025,7 +1025,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1133,7 +1133,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1222,7 +1222,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1309,7 +1309,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1495,7 +1495,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8> ; CHECK-NEXT: [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -1590,7 +1590,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8> ; CHECK-NEXT: [[TMP33:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP32]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index 279e0be3c8312..7f81a672478bb 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -350,7 +350,7 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP104]], [[TMP101]] ; CHECK-NEXT: [[BIN_RDX37:%.*]] = add i32 [[TMP107]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX38:%.*]] = add i32 [[TMP110]], [[BIN_RDX37]] -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -589,7 +589,7 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP112]], [[TMP109]] ; CHECK-NEXT: [[BIN_RDX39:%.*]] = mul i32 [[TMP115]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX40:%.*]] = mul i32 [[TMP118]], [[BIN_RDX39]] -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index f95be1a221e73..a587e4c8b6412 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -61,7 +61,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -164,7 +164,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -270,7 +270,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: @@ -375,7 +375,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -480,7 +480,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -585,7 +585,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -690,7 +690,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -795,7 +795,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -883,7 +883,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -969,7 +969,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 545c595e116fa..41010cd84d5a8 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -18,7 +18,6 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[EXTRA_ITER]], 1 -; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[EXTRA_ITER]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -39,8 +38,8 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[NEXT:%.*]] = phi i32 [ [[SEL:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll index 07911e5ec0d75..bf86cbd601f44 100644 --- a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll @@ -20,7 +20,6 @@ define void @pr75298_store_reduction_value_in_folded_loop(i64 %iv.start) optsize ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll index 61e60741f0376..6e94d3bfca4d0 100644 --- a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll @@ -59,10 +59,10 @@ define float @pr70988() { ; CHECK-ALM-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022 ; CHECK-ALM-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ALM: middle.block: -; CHECK-ALM-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-ALM-NEXT: br label [[EXIT:%.*]] ; CHECK-ALM: scalar.ph: -; CHECK-ALM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ALM-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ALM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-ALM-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ALM-NEXT: br label [[LOOP:%.*]] ; CHECK-ALM: loop: ; CHECK-ALM-NEXT: [[INDEX:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] @@ -182,10 +182,10 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-ALM-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-ALM-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ALM: middle.block: -; CHECK-ALM-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-ALM-NEXT: br label [[EXIT:%.*]] ; CHECK-ALM: scalar.ph: -; CHECK-ALM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ALM-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ALM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-ALM-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ALM-NEXT: br label [[LOOP:%.*]] ; CHECK-ALM: loop: ; CHECK-ALM-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NARROW:%.*]], [[LOOP]] ] @@ -240,10 +240,10 @@ define float @fadd_reduction_with_live_in(float %inc) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1002 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1002, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -277,10 +277,10 @@ define float @fadd_reduction_with_live_in(float %inc) { ; CHECK-ALM-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1002 ; CHECK-ALM-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ALM: middle.block: -; CHECK-ALM-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-ALM-NEXT: br label [[EXIT:%.*]] ; CHECK-ALM: scalar.ph: -; CHECK-ALM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1002, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ALM-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ALM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-ALM-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ALM-NEXT: br label [[LOOP:%.*]] ; CHECK-ALM: loop: ; CHECK-ALM-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 31bf18a4cbd69..29de2c70dadc3 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -53,7 +53,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[exit:.*]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 336c5ea4331b8..c41d318157005 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -53,9 +53,9 @@ define void @VF1-VPlanExe(ptr %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -88,7 +88,6 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds double, ptr [[PTR1:%.*]], i64 15 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR1]], i64 128 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] @@ -133,9 +132,9 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR1]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[PTR1]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 85b44a7076d1b..8d066b8f4b0aa 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -164,7 +164,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 ; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 -; VF8UF1-NEXT: [[TMP1:%.*]] = add i64 2, [[N_VEC]] ; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF1: [[VECTOR_BODY]]: ; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 @@ -228,9 +227,9 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF1: [[PRED_STORE_CONTINUE14]]: ; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF1: [[MIDDLE_BLOCK]]: -; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF8UF1-NEXT: br label %[[EXIT:.*]] ; VF8UF1: [[SCALAR_PH]]: -; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[ENTRY]] ] ; VF8UF1-NEXT: br label %[[LOOP:.*]] ; VF8UF1: [[LOOP]]: ; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -252,7 +251,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 ; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 -; VF8UF2-NEXT: [[TMP1:%.*]] = add i64 2, [[N_VEC]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: ; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 @@ -373,9 +371,9 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF2: [[PRED_STORE_CONTINUE30]]: ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: -; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[SCALAR_PH]]: -; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[ENTRY]] ] ; VF8UF2-NEXT: br label %[[LOOP:.*]] ; VF8UF2: [[LOOP]]: ; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -397,7 +395,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 ; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 -; VF16UF1-NEXT: [[TMP1:%.*]] = add i64 2, [[N_VEC]] ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: ; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 @@ -517,9 +514,9 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF16UF1: [[PRED_STORE_CONTINUE30]]: ; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: -; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[SCALAR_PH]]: -; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[ENTRY]] ] ; VF16UF1-NEXT: br label %[[LOOP:.*]] ; VF16UF1: [[LOOP]]: ; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -727,7 +724,6 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 ; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1 -; VF8UF1-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]] ; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF1: [[VECTOR_BODY]]: ; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 @@ -815,9 +811,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF1: [[PRED_STORE_CONTINUE14]]: ; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF1: [[MIDDLE_BLOCK]]: -; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF8UF1-NEXT: br label %[[EXIT:.*]] ; VF8UF1: [[SCALAR_PH]]: -; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF8UF1-NEXT: br label %[[LOOP:.*]] ; VF8UF1: [[LOOP]]: ; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -844,7 +840,6 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 ; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1 -; VF8UF2-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: ; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 @@ -1013,9 +1008,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF2: [[PRED_STORE_CONTINUE30]]: ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: -; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[SCALAR_PH]]: -; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF8UF2-NEXT: br label %[[LOOP:.*]] ; VF8UF2: [[LOOP]]: ; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -1042,7 +1037,6 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 ; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1 -; VF16UF1-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]] ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: ; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 @@ -1210,9 +1204,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF16UF1: [[PRED_STORE_CONTINUE30]]: ; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: -; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[SCALAR_PH]]: -; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF16UF1-NEXT: br label %[[LOOP:.*]] ; VF16UF1: [[LOOP]]: ; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index aa05bb153966e..5e05717185a03 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -262,7 +262,6 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<21> + vp<[[VEC_TC]]> * ir<1> ; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr inbounds ir<%A>, ir<0> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -1047,7 +1046,6 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%n> + vp<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -1084,23 +1082,7 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT branch-on-cond ir -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[END]]>, ir<%n> -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME]]> from scalar.ph) -; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1 -; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv -; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16 -; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1 -; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv -; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16 -; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0 -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors From 19f1666f152c36930ce99d942dedb937f85f83ed Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 13 Mar 2025 21:30:51 +0000 Subject: [PATCH 3/7] !fixup update after recent changes. --- .../Transforms/Vectorize/LoopVectorize.cpp | 9 ++-- llvm/lib/Transforms/Vectorize/VPlan.h | 16 +++++-- .../Transforms/Vectorize/VPlanTransforms.cpp | 46 +++++++++---------- .../LoopVectorize/AArch64/optsize_minsize.ll | 20 ++++---- .../LoopVectorize/ARM/optsize_minsize.ll | 8 ++-- .../vectorize-force-tail-with-evl-div.ll | 16 +++---- ...ce-tail-with-evl-fixed-order-recurrence.ll | 16 ++++++- ...an-vp-intrinsics-fixed-order-recurrence.ll | 4 +- 8 files changed, 75 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7e9747c565efd..cef9088d7a051 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2502,13 +2502,13 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { PreVectorPH->swapSuccessors(); // We just connected a new block to the scalar preheader. Update all - // ResumePhis by adding an incoming value for it. + // ResumePhis by adding an incoming value for it, replacing the last value. for (VPRecipeBase &R : *cast(ScalarPH)) { auto *ResumePhi = dyn_cast(&R); if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi) continue; ResumePhi->addOperand( - ResumePhi->getOperand(ScalarPH->getNumPredecessors() == 1 ? 0 : 1)); + ResumePhi->getOperand(ResumePhi->getNumOperands() - 1)); } } @@ -7570,9 +7570,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // properly model costs for such loops. auto ExitBlocks = BestPlan.getExitBlocks(); bool PlanForEarlyExitLoop = - std::distance(ExitBlocks.begin(), ExitBlocks.end()) > 2 || - (std::distance(ExitBlocks.begin(), ExitBlocks.end()) == 1 && - (*ExitBlocks.begin())->getNumPredecessors() > 1); + ExitBlocks.size() > 1 || + (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1); assert((BestFactor.Width == LegacyVF.Width || PlanForEarlyExitLoop || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), CostCtx, OrigLoop) || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 43dc30c40bb53..a534b4e3b7e28 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3529,12 +3529,18 @@ class VPlan { /// Returns the 'middle' block of the plan, that is the block that selects /// whether to execute the scalar tail loop or the exit block from the loop - /// latch. - const VPBasicBlock *getMiddleBlock() const { - return cast(getScalarPreheader()->getPredecessors().front()); - } + /// latch. If the scalar tail loop or exit block are known to always execute, + /// the middle block may branch directly to the block. VPBasicBlock *getMiddleBlock() { - return cast(getScalarPreheader()->getPredecessors().front()); + if (!getScalarPreheader()->getPredecessors().empty()) + return cast( + getScalarPreheader()->getPredecessors().front()); + if (getExitBlocks().size() == 1) + return cast(getExitBlocks()[0]->getPredecessors().front()); + return nullptr; + } + const VPBasicBlock *getMiddleBlock() const { + return const_cast(this)->getMiddleBlock(); } /// Return the VPBasicBlock for the preheader of the scalar loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a5e5e719f4ed1..b98be796862ff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1467,45 +1467,45 @@ void VPlanTransforms::truncateToMinimalBitwidths( "some entries in MinBWs haven't been processed"); } -/// Remove BranchOnCond recipes with constant conditions together with removing +/// Remove BranchOnCond recipes with true conditions together with removing /// dead edges to their successors. static void simplifyCFG(VPlan &Plan) { using namespace llvm::VPlanPatternMatch; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { - if (VPBB->getNumSuccessors() != 2 || VPBB->begin() == VPBB->end() || + if (VPBB->getNumSuccessors() != 2 || !match(&VPBB->back(), m_BranchOnCond(m_True()))) continue; - VPBasicBlock *DeadSucc = cast(VPBB->getSuccessors()[1]); - const auto &Preds = DeadSucc->getPredecessors(); + VPBasicBlock *RemovedSucc = cast(VPBB->getSuccessors()[1]); + const auto &Preds = RemovedSucc->getPredecessors(); unsigned DeadIdx = std::distance(Preds.begin(), find(Preds, VPBB)); - // Remove values coming from VPBB from phi-like recipes in DeadSucc. - for (VPRecipeBase &R : make_early_inc_range(*DeadSucc)) { + // Remove values coming from VPBB from phi-like recipes in RemovedSucc. + for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) { assert((!isa(&R) || !isa(cast(&R)->getInstruction())) && !isa(&R) && "Cannot update VPIRInstructions wrapping phis or header phis yet"); auto *VPI = dyn_cast(&R); - if (VPI && VPI->getOpcode() == VPInstruction::ResumePhi) { - VPBuilder B(VPI); - SmallVector NewOps; - // Create new operand list, with the dead incoming value filtered out. - for (const auto &[Idx, Op] : enumerate(VPI->operands())) { - if (Idx == DeadIdx) - continue; - NewOps.push_back(Op); - } - VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, NewOps, - VPI->getDebugLoc(), - VPI->getName())); - VPI->eraseFromParent(); + if (!VPI || VPI->getOpcode() != VPInstruction::ResumePhi) + break; + VPBuilder B(VPI); + SmallVector NewOps; + // Create new operand list, with the dead incoming value filtered out. + for (const auto &[Idx, Op] : enumerate(VPI->operands())) { + if (Idx == DeadIdx) + continue; + NewOps.push_back(Op); } + VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, NewOps, + VPI->getDebugLoc(), + VPI->getName())); + VPI->eraseFromParent(); } - // Disconnect blocks and remove the terminator. DeadSucc will be deleted - // automatically on VPlan descruction. - VPBlockUtils::disconnectBlocks(VPBB, DeadSucc); + // Disconnect blocks and remove the terminator. RemovedSucc will be deleted + // automatically on VPlan destruction. + VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc); VPBB->back().eraseFromParent(); } } @@ -1519,7 +1519,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { runPass(legalizeAndOptimizeInductions, Plan); runPass(removeRedundantExpandSCEVRecipes, Plan); runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType()); - simplifyCFG(Plan); + runPass(simplifyCFG, Plan); runPass(removeDeadRecipes, Plan); runPass(createAndOptimizeReplicateRegions, Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 291b8f3348f09..9315ca4540d7f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -404,9 +404,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: ; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -599,9 +599,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; OPTSIZE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; OPTSIZE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: -; OPTSIZE-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: ; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -728,9 +728,9 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -805,9 +805,9 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 ; OPTSIZE-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: -; OPTSIZE-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: ; OPTSIZE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -882,9 +882,9 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 ; MINSIZE-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: -; MINSIZE-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: ; MINSIZE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 02874d4457219..23f14b7a1011b 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -400,9 +400,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: ; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -595,9 +595,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; OPTSIZE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; OPTSIZE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: -; OPTSIZE-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] +; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: ; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll index 0bfab2da51fa7..47294b702ae61 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll @@ -47,9 +47,9 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -144,9 +144,9 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -240,9 +240,9 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -336,9 +336,9 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: -; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll index 6d41d8590944c..0d6f4539ea0ad 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll @@ -208,6 +208,14 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 33, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 22, %[[ENTRY]] ] +; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] +; IF-EVL: [[FOR_BODY]]: +; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[TMP31]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR1]], [[FOR2]] @@ -371,12 +379,16 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 33, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ 22, %[[ENTRY]] ] +; IF-EVL-NEXT: [[SCALAR_RECUR_INIT6:%.*]] = phi i32 [ 11, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] ; IF-EVL: [[FOR_BODY]]: ; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP38:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT7]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] +; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT6]], %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[TMP38]] = load i32, ptr [[ARRAYIDX]], align 4 ; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR2]], [[FOR3]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index 0bcfe13832ae7..814f8d2f576ca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -48,9 +48,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: Successor(s): middle.block ; IF-EVL-EMPTY: ; IF-EVL: middle.block: -; IF-EVL-NEXT: EMIT vp<[[RESUME_EXTRACT:%.+]]> = extract-from-end ir<[[LD]]>, ir<1> -; IF-EVL-NEXT: EMIT branch-on-cond ir -; IF-EVL-NEXT: Successor(s): ir-bb, scalar.ph +; IF-EVL-NEXT: Successor(s): ir-bb entry: br label %for.body From 641c647053eb04daecbe0324e5566fba29a50d72 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 23 Mar 2025 16:45:44 +0000 Subject: [PATCH 4/7] !fixup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 4 + llvm/lib/Transforms/Vectorize/VPlan.h | 14 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 21 +- .../AArch64/conditional-branches-cost.ll | 74 ++- .../AArch64/induction-costs-sve.ll | 49 ++ .../AArch64/masked-call-scalarize.ll | 14 +- .../LoopVectorize/AArch64/masked-call.ll | 568 ++++++++++-------- .../AArch64/reduction-recurrence-costs-sve.ll | 26 + ...eave-to-widen-memory-remove-loop-region.ll | 4 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 112 ++++ ...orize-force-tail-with-evl-uniform-store.ll | 1 + .../X86/fp64_to_uint32-cost-model.ll | 2 +- .../Transforms/LoopVectorize/X86/pr81872.ll | 2 +- 13 files changed, 624 insertions(+), 267 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 283d5837eb751..3034ada4f025e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2660,6 +2660,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarPreHeader = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, Twine(Prefix) + "scalar.ph"); + // NOTE: The Plan's scalar preheader isn't replaced with a VPIRBasicBlock + // wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar + // preheader may be unreachable at this point. } /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV @@ -8021,6 +8024,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { Phi->removeIncomingValue(EPI.MemSafetyCheck); } + replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader); return LoopVectorPreHeader; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bc84075241665..17d8418fcbb45 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3498,12 +3498,14 @@ class VPlan { /// latch. If the scalar tail loop or exit block are known to always execute, /// the middle block may branch directly to the block. VPBasicBlock *getMiddleBlock() { - if (!getScalarPreheader()->getPredecessors().empty()) - return cast( - getScalarPreheader()->getPredecessors().front()); - if (getExitBlocks().size() == 1) - return cast(getExitBlocks()[0]->getPredecessors().front()); - return nullptr; + VPRegionBlock *LoopRegion = getVectorLoopRegion(); + if (!LoopRegion) + return nullptr; + auto *RegionSucc = LoopRegion->getSingleSuccessor(); + if (RegionSucc->getSingleSuccessor() || + is_contained(RegionSucc->getSuccessors(), getScalarPreheader())) + return cast(RegionSucc); + return cast(RegionSucc->getSuccessors()[1]); } const VPBasicBlock *getMiddleBlock() const { return const_cast(this)->getMiddleBlock(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4045a9189fc50..39586d97fa176 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1473,19 +1473,22 @@ void VPlanTransforms::truncateToMinimalBitwidths( /// Remove BranchOnCond recipes with true conditions together with removing /// dead edges to their successors. -static void simplifyCFG(VPlan &Plan) { +static void simplifyBranchOnCondTrue(VPlan &Plan) { using namespace llvm::VPlanPatternMatch; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( - vp_depth_first_deep(Plan.getEntry()))) { + vp_depth_first_shallow(Plan.getEntry()))) { if (VPBB->getNumSuccessors() != 2 || !match(&VPBB->back(), m_BranchOnCond(m_True()))) continue; VPBasicBlock *RemovedSucc = cast(VPBB->getSuccessors()[1]); const auto &Preds = RemovedSucc->getPredecessors(); + assert(count(Preds, VPBB) == 1 && + "There must be a single edge between VPBB and its successor"); unsigned DeadIdx = std::distance(Preds.begin(), find(Preds, VPBB)); - // Remove values coming from VPBB from phi-like recipes in RemovedSucc. + // Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed + // from these recipes. for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) { assert((!isa(&R) || !isa(cast(&R)->getInstruction())) && @@ -1495,20 +1498,20 @@ static void simplifyCFG(VPlan &Plan) { if (!VPI || VPI->getOpcode() != VPInstruction::ResumePhi) break; VPBuilder B(VPI); - SmallVector NewOps; + SmallVector NewOperands; // Create new operand list, with the dead incoming value filtered out. for (const auto &[Idx, Op] : enumerate(VPI->operands())) { if (Idx == DeadIdx) continue; - NewOps.push_back(Op); + NewOperands.push_back(Op); } - VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, NewOps, - VPI->getDebugLoc(), + VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, + NewOperands, VPI->getDebugLoc(), VPI->getName())); VPI->eraseFromParent(); } // Disconnect blocks and remove the terminator. RemovedSucc will be deleted - // automatically on VPlan destruction. + // automatically on VPlan destruction if it becomes unreachable. VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc); VPBB->back().eraseFromParent(); } @@ -1523,7 +1526,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { runPass(legalizeAndOptimizeInductions, Plan); runPass(removeRedundantExpandSCEVRecipes, Plan); runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType()); - runPass(simplifyCFG, Plan); + runPass(simplifyBranchOnCondTrue, Plan); runPass(removeDeadRecipes, Plan); runPass(createAndOptimizeReplicateRegions, Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index bc8d5b1480c68..e86681a7e74dd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -460,12 +460,18 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 104, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; PRED-NEXT: br label %[[LOOP:.*]] ; PRED: [[LOOP]]: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; PRED-NEXT: store i8 0, ptr [[GEP]], align 1 +; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 +; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; entry: @@ -606,6 +612,10 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; ; PRED-LABEL: define i32 @header_mask_and_invariant_compare( ; PRED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; PRED-NEXT: [[ENTRY:.*]]: +; PRED-NEXT: br label %[[LOOP_HEADER:.*]] +; PRED: [[LOOP_HEADER]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; PRED-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4 ; PRED-NEXT: [[L_B:%.*]] = load i32, ptr [[B]], align 4 ; PRED-NEXT: [[OR:%.*]] = or i32 [[L_B]], [[L_A]] @@ -732,6 +742,16 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 +; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[DST]], %[[ENTRY]] ] +; PRED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[L:%.*]] = load i16, ptr [[SRC]], align 2 ; PRED-NEXT: [[O:%.*]] = or i16 [[L]], 1 ; PRED-NEXT: [[CONV:%.*]] = uitofp i16 [[O]] to double @@ -850,6 +870,14 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT: [[PRED_STORE_CONTINUE14]]: ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; DEFAULT: [[MIDDLE_BLOCK]]: +; DEFAULT-NEXT: br label %[[EXIT:.*]] +; DEFAULT: [[SCALAR_PH]]: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: br label %[[LOOP:.*]] +; DEFAULT: [[LOOP]]: +; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; DEFAULT-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8 ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] ; DEFAULT-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 @@ -944,6 +972,14 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED: [[PRED_STORE_CONTINUE14]]: ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8 ; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] ; PRED-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 @@ -1373,6 +1409,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0 +; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: br label %[[LOOP_HEADER:.*]] +; PRED: [[LOOP_HEADER]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; PRED-NEXT: [[TMP86:%.*]] = load float, ptr [[SRC_1]], align 4 ; PRED-NEXT: [[TMP87:%.*]] = load float, ptr [[SRC_2]], align 4 ; PRED-NEXT: [[MUL8_I_US:%.*]] = fmul float [[TMP87]], 0.000000e+00 @@ -1478,6 +1522,18 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 +; DEFAULT-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; DEFAULT: [[MIDDLE_BLOCK]]: +; DEFAULT-NEXT: br label %[[EXIT:.*]] +; DEFAULT: [[SCALAR_PH]]: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]] +; DEFAULT: [[LOOP_HEADER]]: +; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; DEFAULT-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[THEN:.*]] +; DEFAULT: [[THEN]]: +; DEFAULT-NEXT: br label %[[LOOP_LATCH]] +; DEFAULT: [[LOOP_LATCH]]: ; DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; DEFAULT-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; DEFAULT-NEXT: store i32 [[T]], ptr [[DST]], align 4 @@ -1529,6 +1585,18 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 +; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; PRED-NEXT: br label %[[LOOP_HEADER:.*]] +; PRED: [[LOOP_HEADER]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; PRED-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[THEN:.*]] +; PRED: [[THEN]]: +; PRED-NEXT: br label %[[LOOP_LATCH]] +; PRED: [[LOOP_LATCH]]: ; PRED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; PRED-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; PRED-NEXT: store i32 [[T]], ptr [[DST]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 7298183027f40..6d9e9d4849289 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -139,6 +139,17 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP23]], ptr [[TMP27]], i32 1, [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP15]]) +; PRED-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 +; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] ; PRED-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 ; PRED-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 @@ -303,6 +314,14 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0 +; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: br label %[[FOR_BODY:.*]] +; PRED: [[FOR_BODY]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; PRED-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32 ; PRED-NEXT: [[ADD_I:%.*]] = mul i32 [[MUL_X]], [[TRUNC_IV]] ; PRED-NEXT: [[IV_MUL:%.*]] = zext i32 [[ADD_I]] to i64 @@ -489,6 +508,16 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0 +; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL8]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_1_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 ; PRED-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[MUL]], [[IV_1_TRUNC]] ; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 @@ -675,6 +704,16 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0 +; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL7]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 ; PRED-NEXT: [[IV_MUL:%.*]] = mul i32 [[ADD]], [[IV_TRUNC]] ; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 @@ -806,6 +845,16 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED: [[PRED_STORE_CONTINUE5]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; PRED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2 ; PRED-NEXT: store i32 0, ptr [[GEP]], align 8 ; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 121a6ed53309e..aa144c25055cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -54,14 +54,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; ; TFCOMMON-LABEL: @test_widen_exp_v2( ; TFCOMMON-NEXT: entry: -; TFCOMMON-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1 -; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 -; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TFCOMMON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2 -; TFCOMMON-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2 +; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0:%.*]], 1 +; TFCOMMON-NEXT: [[TMP1:%.*]] = sub i64 [[N_RND_UP]], 2 +; TFCOMMON-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N_RND_UP]], 2 ; TFCOMMON-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 -; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[N_RND_UP]]) ; TFCOMMON-NEXT: br label [[LOOP:%.*]] ; TFCOMMON: vector.body: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] @@ -100,9 +97,6 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-LABEL: @test_widen_exp_v2( ; TFA_INTERLEAVE-NEXT: entry: ; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0:%.*]], 1 -; TFA_INTERLEAVE-NEXT: [[N_RND_UP1:%.*]] = add i64 [[N_RND_UP]], 3 -; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP1]], 4 -; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP1]], [[N_MOD_VF]] ; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = sub i64 [[N_RND_UP]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N_RND_UP]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 9545a825b9dda..144beef4da565 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -49,60 +49,66 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE: [[FOR_COND_CLEANUP]]: ; TFNONE-NEXT: ret void ; -; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TFCOMMON-LABEL: define void @test_widen( +; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; TFCOMMON-NEXT: [[ENTRY:.*]]: +; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFCOMMON-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP3:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TFCOMMON-NEXT: [[TMP5:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void ; -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-LABEL: define void @test_widen( +; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: +; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP4]], i64 [[TMP6]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP12]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP9]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP16]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX_NEXT]], [[TMP15]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP18]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -174,46 +180,75 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE: [[FOR_COND_CLEANUP]]: ; TFNONE-NEXT: ret void ; -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFCOMMON-LABEL: define void @test_if_then( +; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFCOMMON-NEXT: [[ENTRY:.*]]: +; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) +; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] +; TFCOMMON: [[VECTOR_BODY]]: +; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFCOMMON-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP3:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFCOMMON-NEXT: [[TMP4:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], zeroinitializer +; TFCOMMON-NEXT: [[TMP5:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP4]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP4]], [[TMP5]], zeroinitializer +; TFCOMMON-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) +; TFCOMMON-NEXT: [[TMP7:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP8:%.*]] = extractelement [[TMP7]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFCOMMON: [[FOR_COND_CLEANUP]]: +; TFCOMMON-NEXT: ret void +; +; TFA_INTERLEAVE-LABEL: define void @test_if_then( +; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: +; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP13]]) -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP14]]) -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[TMP15]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP14]], [[TMP16]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP9]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP10]]) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP11]]) +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[TMP12]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP11]], [[TMP13]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP16]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP14]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX_NEXT]], [[TMP19]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP20]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP22]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -301,62 +336,84 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE: [[FOR_COND_CLEANUP]]: ; TFNONE-NEXT: ret void ; -; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TFCOMMON-LABEL: define void @test_widen_if_then_else( +; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFCOMMON-NEXT: [[ENTRY:.*]]: +; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFCOMMON-NEXT: [[TMP7:%.*]] = xor [[TMP6]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], zeroinitializer -; TFCOMMON-NEXT: [[TMP9:%.*]] = call @foo_vector( zeroinitializer, [[TMP8]]) -; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer -; TFCOMMON-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP10]]) -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[TMP9]], [[TMP11]] -; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFCOMMON-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP3:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFCOMMON-NEXT: [[TMP4:%.*]] = xor [[TMP3]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP5:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP4]], zeroinitializer +; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( zeroinitializer, [[TMP5]]) +; TFCOMMON-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], zeroinitializer +; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP7]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP6]], [[TMP8]] +; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP14]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP11]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void ; -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor [[TMP11]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor [[TMP12]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP14]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call @foo_vector( zeroinitializer, [[TMP15]]) -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call @foo_vector( zeroinitializer, [[TMP16]]) -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP19]]) -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP20]]) -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP17]], [[TMP21]] -; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP16]], [[TMP18]], [[TMP22]] -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-LABEL: define void @test_widen_if_then_else( +; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: +; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) +; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] +; TFA_INTERLEAVE: [[VECTOR_BODY]]: +; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor [[TMP8]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = xor [[TMP9]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP10]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP11]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call @foo_vector( zeroinitializer, [[TMP12]]) +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( zeroinitializer, [[TMP13]]) +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP9]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP16]]) +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP17]]) +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], [[TMP14]], [[TMP18]] +; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP13]], [[TMP15]], [[TMP19]] +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP22]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP23]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP25]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP23]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP26]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]] +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = add i64 [[INDEX_NEXT]], [[TMP25]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP29]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP26]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement [[TMP27]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP28]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -563,80 +620,89 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE: [[FOR_COND_CLEANUP]]: ; TFNONE-NEXT: ret void ; -; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TFALWAYS-LABEL: define void @test_widen_optmask( +; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFALWAYS-NEXT: [[ENTRY:.*]]: +; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]] ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFALWAYS-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFALWAYS-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFALWAYS-NEXT: [[TMP3:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 -; TFALWAYS-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TFALWAYS-NEXT: [[TMP5:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFALWAYS-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i32 0 +; TFALWAYS-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFALWAYS: [[FOR_COND_CLEANUP]]: ; TFALWAYS-NEXT: ret void ; -; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TFFALLBACK-LABEL: define void @test_widen_optmask( +; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFFALLBACK-NEXT: [[ENTRY:.*]]: +; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFFALLBACK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFFALLBACK-NEXT: [[TMP3:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFFALLBACK-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 -; TFFALLBACK-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; TFFALLBACK-NEXT: [[TMP5:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFFALLBACK-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i32 0 +; TFFALLBACK-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFFALLBACK: [[FOR_COND_CLEANUP]]: ; TFFALLBACK-NEXT: ret void ; -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-LABEL: define void @test_widen_optmask( +; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { +; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: +; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP4]], i64 [[TMP6]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP12]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP9]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP16]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX_NEXT]], [[TMP15]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP18]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -713,8 +779,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[MULADD_LCSSA:%.*]] = phi double [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] ; TFNONE-NEXT: ret double [[MULADD_LCSSA]] ; -; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TFALWAYS-LABEL: define double @test_widen_fmuladd_and_call( +; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { +; TFALWAYS-NEXT: [[ENTRY:.*]]: +; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -722,26 +791,29 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFALWAYS-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFALWAYS-NEXT: [[TMP7:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFALWAYS-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], splat (double -0.000000e+00) -; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) -; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; TFALWAYS-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFALWAYS-NEXT: [[TMP3:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFALWAYS-NEXT: [[TMP4:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFALWAYS-NEXT: [[TMP5:%.*]] = call @foo_vector( [[TMP4]], [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP5]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], splat (double -0.000000e+00) +; TFALWAYS-NEXT: [[TMP8]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP7]]) +; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; TFALWAYS-NEXT: br i1 [[TMP13]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TFALWAYS-NEXT: [[TMP9:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFALWAYS-NEXT: [[TMP10:%.*]] = extractelement [[TMP9]], i32 0 +; TFALWAYS-NEXT: br i1 [[TMP10]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFALWAYS: [[FOR_COND_CLEANUP]]: -; TFALWAYS-NEXT: ret double [[TMP11]] +; TFALWAYS-NEXT: ret double [[TMP8]] ; -; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TFFALLBACK-LABEL: define double @test_widen_fmuladd_and_call( +; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { +; TFFALLBACK-NEXT: [[ENTRY:.*]]: +; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -749,30 +821,33 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFFALLBACK-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFFALLBACK-NEXT: [[TMP7:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFFALLBACK-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], splat (double -0.000000e+00) -; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; TFFALLBACK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFFALLBACK-NEXT: [[TMP3:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFFALLBACK-NEXT: [[TMP4:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFFALLBACK-NEXT: [[TMP5:%.*]] = call @foo_vector( [[TMP4]], [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP5]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], splat (double -0.000000e+00) +; TFFALLBACK-NEXT: [[TMP8]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP7]]) +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; TFFALLBACK-NEXT: br i1 [[TMP13]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TFFALLBACK-NEXT: [[TMP9:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFFALLBACK-NEXT: [[TMP10:%.*]] = extractelement [[TMP9]], i32 0 +; TFFALLBACK-NEXT: br i1 [[TMP10]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFFALLBACK: [[FOR_COND_CLEANUP]]: -; TFFALLBACK-NEXT: ret double [[TMP11]] +; TFFALLBACK-NEXT: ret double [[TMP8]] ; -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-LABEL: define double @test_widen_fmuladd_and_call( +; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { +; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: +; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -780,40 +855,40 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP9]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = fmul [[WIDE_MASKED_LOAD3]], [[BROADCAST_SPLAT]] -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fptoui [[WIDE_MASKED_LOAD3]] to -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[TMP13]], [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[TMP14]], [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP15]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP16]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], splat (double -0.000000e+00) -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP21]]) -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], splat (double -0.000000e+00) -; TFA_INTERLEAVE-NEXT: [[TMP24]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP22]], [[TMP23]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] +; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[TMP4]], i64 [[TMP6]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fmul [[WIDE_MASKED_LOAD3]], [[BROADCAST_SPLAT]] +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = fptoui [[WIDE_MASKED_LOAD3]] to +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[TMP10]], [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = call @foo_vector( [[TMP11]], [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP16]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP14]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP13]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], splat (double -0.000000e+00) +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP18]]) +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP9]], splat (double -0.000000e+00) +; TFA_INTERLEAVE-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP19]], [[TMP20]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = add i64 [[INDEX_NEXT]], [[TMP23]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP24]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement [[TMP25]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP26]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: -; TFA_INTERLEAVE-NEXT: ret double [[TMP24]] +; TFA_INTERLEAVE-NEXT: ret double [[TMP21]] ; entry: br label %for.body @@ -893,12 +968,35 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE: [[END]]: ; TFNONE-NEXT: ret void ; +; TFCOMMON-LABEL: define void @test_widen_exp_v2( +; TFCOMMON-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; TFCOMMON-NEXT: [[ENTRY:.*]]: +; TFCOMMON-NEXT: br label %[[LOOP:.*]] +; TFCOMMON: [[LOOP]]: +; TFCOMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8 +; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]] +; TFCOMMON-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00 +; TFCOMMON-NEXT: [[SINK:%.*]] = select i1 [[COND1]], double 0.000000e+00, double 1.000000e+00 +; TFCOMMON-NEXT: store double [[SINK]], ptr [[P]], align 8 +; TFCOMMON-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; TFCOMMON-NEXT: [[COND2:%.*]] = icmp eq i64 [[IV]], [[N]] +; TFCOMMON-NEXT: br i1 [[COND2]], label %[[END:.*]], label %[[LOOP]] +; TFCOMMON: [[END]]: +; TFCOMMON-NEXT: ret void +; +; TFA_INTERLEAVE-LABEL: define void @test_widen_exp_v2( +; TFA_INTERLEAVE-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: +; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = icmp ult i64 0, [[TMP0]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]] ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: -; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ] +; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8 @@ -907,8 +1005,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00 -; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8 +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00 +; TFA_INTERLEAVE-NEXT: store double [[PREDPHI]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]] ; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]: ; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]] @@ -916,11 +1014,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]] ; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00 -; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8 +; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00 +; TFA_INTERLEAVE-NEXT: store double [[PREDPHI3]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]] ; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]: -; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2 +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT6]] = icmp ult i64 [[TMP11]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 5aab03dd92807..370d96420fa52 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -254,6 +254,18 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP41]]) +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ] +; PRED-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ 0, %[[ENTRY]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[TMP45:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP53:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[SCALAR_RECUR10:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[TMP45]], %[[LOOP]] ] +; PRED-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[TMP52:%.*]] = add i64 [[Y]], 1 ; PRED-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP52]] ; PRED-NEXT: [[TMP53]] = load i32, ptr [[GEP_1]], align 4 @@ -479,6 +491,20 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP21:%.*]] = or [[TMP20]], [[VEC_PHI]] ; PRED-NEXT: [[TMP16]] = select [[ACTIVE_LANE_MASK]], [[TMP21]], [[VEC_PHI]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] +; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP12]]) +; PRED-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; PRED-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED: [[MIDDLE_BLOCK]]: +; PRED-NEXT: [[TMP19:%.*]] = call i16 @llvm.vector.reduce.or.nxv8i16( [[TMP16]]) +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[SCALAR_PH]]: +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] +; PRED-NEXT: br label %[[LOOP:.*]] +; PRED: [[LOOP]]: +; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[RED:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] ; PRED-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 ; PRED-NEXT: [[DIV:%.*]] = udiv i16 [[L]], [[X]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index a4812f900fa54..65bdbbfb1ce9a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -113,9 +113,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: -; VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VF4-NEXT: br label %[[EXIT:.*]] ; VF4: [[SCALAR_PH]]: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF4-NEXT: br label %[[LOOP:.*]] ; VF4: [[LOOP]]: ; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 584a0833fca29..99269ffc02de0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -116,6 +116,14 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -143,6 +151,14 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -448,6 +464,14 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] ; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-SCALABLE-NEXT: br i1 [[CMP]], label %[[DO_LOAD:.*]], label %[[LATCH]] ; TF-SCALABLE: [[DO_LOAD]]: @@ -486,6 +510,14 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] ; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label %[[DO_LOAD:.*]], label %[[LATCH]] ; TF-FIXEDLEN: [[DO_LOAD]]: @@ -631,6 +663,14 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -658,6 +698,14 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -790,6 +838,14 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -817,6 +873,14 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -974,6 +1038,14 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -1027,6 +1099,14 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-FIXEDLEN-NEXT: store i64 [[IV]], ptr [[B]], align 8 ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -1200,6 +1280,14 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] ; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-SCALABLE-NEXT: br i1 [[CMP]], label %[[DO_STORE:.*]], label %[[LATCH]] ; TF-SCALABLE: [[DO_STORE]]: @@ -1238,6 +1326,14 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] ; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label %[[DO_STORE:.*]], label %[[LATCH]] ; TF-FIXEDLEN: [[DO_STORE]]: @@ -1381,6 +1477,14 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-SCALABLE: [[MIDDLE_BLOCK]]: +; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] +; TF-SCALABLE: [[SCALAR_PH]]: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] +; TF-SCALABLE: [[FOR_BODY]]: +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 @@ -1408,6 +1512,14 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-FIXEDLEN: [[MIDDLE_BLOCK]]: +; TF-FIXEDLEN-NEXT: br label %[[FOR_END:.*]] +; TF-FIXEDLEN: [[SCALAR_PH]]: +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; TF-FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] +; TF-FIXEDLEN: [[FOR_BODY]]: +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index c9091e3732b14..3e734559bb917 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -52,6 +52,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll index 2e4af40651f7d..03783d3a6c9fb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll @@ -29,7 +29,7 @@ for.body: ; preds = %for.body.preheader, store i32 %conv, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %2 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp ult i32 %2, 1000 + %cmp = icmp ult i32 %2, %0 br i1 %cmp, label %for.body, label %for.end.loopexit for.end.loopexit: ; preds = %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 88fc0ba5a7060..98b849dce9666 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -96,7 +96,7 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 3} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23} ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} From 364ee915b9f5d711d3cd9e44775049598d518635 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 30 Mar 2025 18:04:57 +0100 Subject: [PATCH 5/7] !fixup update tests after merge. --- .../AArch64/scalable-strict-fadd.ll | 877 +++++++++--------- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 12 + .../LoopVectorize/RISCV/short-trip-count.ll | 8 +- .../truncate-to-minimal-bitwidth-cost.ll | 2 +- ...-force-tail-with-evl-reverse-load-store.ll | 3 + ...folding-optimize-vector-induction-width.ll | 36 +- 6 files changed, 487 insertions(+), 451 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index c9e8cfe5a8a30..82f64e988e6d3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -230,54 +230,54 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP18]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP19]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP20]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] -; CHECK-UNORDERED-NEXT: [[TMP21]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[TMP18]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP19]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] +; CHECK-UNORDERED-NEXT: [[TMP20]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP19]], [[TMP18]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP20]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP21]], [[BIN_RDX7]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP18]], [[TMP17]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP19]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP20]], [[BIN_RDX7]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP24]], [[SUM_07]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP23]], [[SUM_07]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fadd_strict_unroll @@ -297,47 +297,47 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], [[WIDE_LOAD1]]) -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP19]], [[WIDE_LOAD2]]) -; CHECK-ORDERED-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP20]], [[WIDE_LOAD3]]) +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP17]], [[WIDE_LOAD1]]) +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], [[WIDE_LOAD2]]) +; CHECK-ORDERED-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP19]], [[WIDE_LOAD3]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP23]], [[SUM_07]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP22]], [[SUM_07]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict_unroll @@ -378,47 +378,47 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP28]]) -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], [[TMP30]]) -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], [[TMP32]]) -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP35]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], [[TMP34]]) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP22]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP25]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP20]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP27]]) +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP29]]) +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], [[TMP31]]) +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP34]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], [[TMP33]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = add i64 [[INDEX]], [[TMP37]] -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], [[TMP40]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], [[TMP43]] +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], [[TMP36]] +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = add i64 [[INDEX]], [[TMP39]] +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP38]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP41]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP44]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = extractelement [[TMP45]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP37]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP40]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP43]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = extractelement [[TMP44]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -429,13 +429,13 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP47]], [[SUM_07]] +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP46]], [[SUM_07]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] ; @@ -653,35 +653,48 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv8i1( [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]]) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP14]], i32 4, [[INTERLEAVED_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP13]], i32 4, [[INTERLEAVED_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_MASKED_VEC]]) -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP19]]) +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP16]]) +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP18]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP24]], [[ADD_PHI1]] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[A2]], [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[A1]], [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-ORDERED-TF: for.body: +; CHECK-ORDERED-TF-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP22]], [[ADD_PHI2]] +; CHECK-ORDERED-TF-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 +; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP23]], [[ADD_PHI1]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-TF-NEXT: ret void @@ -913,6 +926,8 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK-ORDERED-TF: scalar.ph: +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1147,6 +1162,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1379,71 +1396,71 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP25]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP28]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP29]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP30]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP31]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP32]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP18]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP27]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP28]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP29]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP30]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP31]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP30]], [[TMP29]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP31]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP32]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP29]], [[TMP28]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP30]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP31]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP34]], float [[TMP35]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict @@ -1463,68 +1480,68 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP25]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP28]], align 4 -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP29]]) +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP27]], align 4 +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP28]]) +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], [[TMP29]]) ; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], [[TMP30]]) -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) -; CHECK-ORDERED-NEXT: [[TMP36]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], [[TMP32]]) +; CHECK-ORDERED-NEXT: [[TMP35]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP38]], float [[TMP39]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP37]], float [[TMP38]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict @@ -1565,66 +1582,66 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP34]] -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP37]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP38]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP39]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP40]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], [[TMP45]]) -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP41]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], [[TMP47]]) -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP42]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], [[TMP49]]) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP22]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP25]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP20]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP30]] +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP36]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP28]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP31]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP34]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP37]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP38]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP42]]) +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP39]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP43]], [[TMP44]]) +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP40]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP45]], [[TMP46]]) +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP41]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP49]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP47]], [[TMP48]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = add i64 [[INDEX]], [[TMP58]] +; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = mul i64 [[TMP50]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], [[TMP51]] +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = mul i64 [[TMP53]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = add i64 [[INDEX]], [[TMP54]] +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = add i64 [[INDEX]], [[TMP57]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement [[TMP60]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP52]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP55]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP58]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = extractelement [[TMP59]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1635,15 +1652,15 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP62]], float [[TMP63]], float [[SUM_07]]) +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP61]], float [[TMP62]], float [[SUM_07]]) ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; @@ -1707,71 +1724,71 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 -; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP25]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP28]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP29]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP30]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP31]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP32]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP18]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP27]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP28]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP29]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP30]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP31]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP30]], [[TMP29]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP31]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP32]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP29]], [[TMP28]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP30]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP31]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP34]], float [[TMP35]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict_fmf @@ -1791,68 +1808,68 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP25]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP28]], align 4 -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP29]]) +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP27]], align 4 +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP28]]) +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], [[TMP29]]) ; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], [[TMP30]]) -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) -; CHECK-ORDERED-NEXT: [[TMP36]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], [[TMP32]]) +; CHECK-ORDERED-NEXT: [[TMP35]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP38]], float [[TMP39]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP37]], float [[TMP38]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict_fmf @@ -1893,66 +1910,66 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP34]] -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP37]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP38]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP39]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP40]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], [[TMP45]]) -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP41]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], [[TMP47]]) -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP42]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], [[TMP49]]) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP22]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP25]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP20]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP30]] +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP33]] +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP36]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP28]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP31]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP34]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP37]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP38]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP42]]) +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP39]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP43]], [[TMP44]]) +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP40]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP45]], [[TMP46]]) +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP41]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP49]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP47]], [[TMP48]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = add i64 [[INDEX]], [[TMP58]] +; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = mul i64 [[TMP50]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], [[TMP51]] +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = mul i64 [[TMP53]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = add i64 [[INDEX]], [[TMP54]] +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = add i64 [[INDEX]], [[TMP57]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement [[TMP60]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP52]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP55]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP58]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = extractelement [[TMP59]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1963,15 +1980,15 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP62]], float [[TMP63]], float [[SUM_07]]) +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP61]], float [[TMP62]], float [[SUM_07]]) ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 61961fa83b4ea..9cf7bc9fe07d6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -141,6 +141,18 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: br label [[EXIT1:%.*]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH1]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52 +; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32 +; CHECK-NEXT: br i1 [[CMP_SLT]], label [[COND_FALSE:%.*]], label [[FOR_BODY]] ; CHECK: cond.false: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32 ; CHECK-NEXT: br label [[FOR_BODY]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index df5d8d55d13d9..29e32131a491f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -16,8 +16,10 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[IV]] @@ -62,8 +64,10 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 1c56527ba0bc9..d6c2f7bab52fa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -167,7 +167,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP10]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[TMP7:%.*]] = trunc [[BROADCAST_SPLAT]] to ; CHECK-NEXT: [[TMP8:%.*]] = or trunc ( splat (i8 23) to ), [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[DST]], i64 0 @@ -190,6 +189,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[F_039:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index c01a976f96976..44f7255bdd760 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -28,6 +28,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] @@ -124,6 +126,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll index ff089c7401d53..adc4c8e2f2d8e 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll @@ -33,9 +33,9 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -94,9 +94,9 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 256, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -155,9 +155,9 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 258 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 258, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -216,9 +216,9 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65536, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -277,9 +277,9 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65538 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65538, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -338,9 +338,9 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296 ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967296, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -399,9 +399,9 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP8]], 4294967298 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967298, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -460,9 +460,9 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -521,9 +521,9 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i256 [[TMP8]], 18446744073709551618 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i256 [ 18446744073709551618, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i256 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i256 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] From b6ced8728d8d599d0b42d64de05fc979e0e8b2d3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 2 Apr 2025 11:52:47 +0100 Subject: [PATCH 6/7] !fixup address latest comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 9 +- .../LoopVectorize/AArch64/masked-call.ll | 518 +++++------ .../AArch64/scalable-strict-fadd.ll | 876 +++++++++--------- 3 files changed, 702 insertions(+), 701 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 99b37cc8c8843..2abbe86cf58fc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2661,7 +2661,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LI, nullptr, Twine(Prefix) + "scalar.ph"); // NOTE: The Plan's scalar preheader isn't replaced with a VPIRBasicBlock // wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar - // preheader may be unreachable at this point. + // preheader may be unreachable at this point. Instead it is replaced in + // createVectorizedLoopSkeleton. } /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV @@ -7578,10 +7579,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // Set PlanForEarlyExitLoop to true if the BestPlan has been built from a // loop with an uncountable early exit. The legacy cost model doesn't // properly model costs for such loops. - auto ExitBlocks = BestPlan.getExitBlocks(); bool PlanForEarlyExitLoop = - ExitBlocks.size() > 1 || - (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1); + BestPlan.getVectorLoopRegion() && + BestPlan.getVectorLoopRegion()->getSingleSuccessor() != + BestPlan.getMiddleBlock(); assert((BestFactor.Width == LegacyVF.Width || PlanForEarlyExitLoop || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), CostCtx, OrigLoop) || diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 144beef4da565..585c2df08f7d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -52,63 +52,63 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-LABEL: define void @test_widen( ; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TFCOMMON-NEXT: [[ENTRY:.*]]: -; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP3:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP5:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TFCOMMON-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void ; ; TFA_INTERLEAVE-LABEL: define void @test_widen( ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP4]], i64 [[TMP6]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP12]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP9]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX_NEXT]], [[TMP15]] +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP16]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP18]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -183,72 +183,72 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-LABEL: define void @test_if_then( ; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFCOMMON-NEXT: [[ENTRY:.*]]: -; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP3:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFCOMMON-NEXT: [[TMP4:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], zeroinitializer -; TFCOMMON-NEXT: [[TMP5:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP4]]) -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP4]], [[TMP5]], zeroinitializer -; TFCOMMON-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFCOMMON-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer +; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP7]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], zeroinitializer +; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP7:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP8:%.*]] = extractelement [[TMP7]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP11]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void ; ; TFA_INTERLEAVE-LABEL: define void @test_if_then( ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP9]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP10]]) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP11]]) -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP11]], [[TMP13]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP16]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP14]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP13]]) +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP14]]) +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[TMP15]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP14]], [[TMP16]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX_NEXT]], [[TMP19]] +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP20]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP22]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -339,81 +339,81 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-LABEL: define void @test_widen_if_then_else( ; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFCOMMON-NEXT: [[ENTRY:.*]]: -; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; TFCOMMON: [[VECTOR_BODY]]: ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFCOMMON-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFCOMMON-NEXT: [[TMP3:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFCOMMON-NEXT: [[TMP4:%.*]] = xor [[TMP3]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP5:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP4]], zeroinitializer -; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( zeroinitializer, [[TMP5]]) -; TFCOMMON-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], zeroinitializer -; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP7]]) -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP6]], [[TMP8]] -; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFCOMMON-NEXT: [[TMP7:%.*]] = xor [[TMP6]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], zeroinitializer +; TFCOMMON-NEXT: [[TMP9:%.*]] = call @foo_vector( zeroinitializer, [[TMP8]]) +; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer +; TFCOMMON-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP10]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[TMP9]], [[TMP11]] +; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP11]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TFCOMMON-NEXT: [[TMP13:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP14]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFCOMMON: [[FOR_COND_CLEANUP]]: ; TFCOMMON-NEXT: ret void ; ; TFA_INTERLEAVE-LABEL: define void @test_widen_if_then_else( ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor [[TMP8]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = xor [[TMP9]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP10]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP11]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call @foo_vector( zeroinitializer, [[TMP12]]) -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( zeroinitializer, [[TMP13]]) -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP9]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP16]]) -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP17]]) -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], [[TMP14]], [[TMP18]] -; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP13]], [[TMP15]], [[TMP19]] -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP22]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP23]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt [[WIDE_MASKED_LOAD3]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor [[TMP11]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor [[TMP12]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP14]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call @foo_vector( zeroinitializer, [[TMP15]]) +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call @foo_vector( zeroinitializer, [[TMP16]]) +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP19]]) +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP20]]) +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP17]], [[TMP21]] +; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP16]], [[TMP18]], [[TMP22]] +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = add i64 [[INDEX_NEXT]], [[TMP25]] +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP25]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP23]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP26]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP26]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement [[TMP27]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP28]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP29]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement [[TMP30]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -623,86 +623,86 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-LABEL: define void @test_widen_optmask( ; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFALWAYS-NEXT: [[ENTRY:.*]]: -; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]] ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFALWAYS-NEXT: [[TMP3:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFALWAYS-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP5:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFALWAYS-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i32 0 -; TFALWAYS-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TFALWAYS-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 +; TFALWAYS-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFALWAYS: [[FOR_COND_CLEANUP]]: ; TFALWAYS-NEXT: ret void ; ; TFFALLBACK-LABEL: define void @test_widen_optmask( ; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFFALLBACK-NEXT: [[ENTRY:.*]]: -; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFFALLBACK-NEXT: [[TMP3:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP5:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFFALLBACK-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i32 0 -; TFFALLBACK-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; TFFALLBACK-NEXT: [[TMP8:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFFALLBACK-NEXT: [[TMP9:%.*]] = extractelement [[TMP8]], i32 0 +; TFFALLBACK-NEXT: br i1 [[TMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFFALLBACK: [[FOR_COND_CLEANUP]]: ; TFFALLBACK-NEXT: ret void ; ; TFA_INTERLEAVE-LABEL: define void @test_widen_optmask( ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP4]], i64 [[TMP6]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP12]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP9]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX_NEXT]], [[TMP15]] +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP16]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP18]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: ; TFA_INTERLEAVE-NEXT: ret void ; @@ -782,8 +782,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-LABEL: define double @test_widen_fmuladd_and_call( ; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFALWAYS-NEXT: [[ENTRY:.*]]: -; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -791,29 +791,29 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS: [[VECTOR_BODY]]: ; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; TFALWAYS-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFALWAYS-NEXT: [[TMP3:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFALWAYS-NEXT: [[TMP4:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFALWAYS-NEXT: [[TMP5:%.*]] = call @foo_vector( [[TMP4]], [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP5]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], splat (double -0.000000e+00) -; TFALWAYS-NEXT: [[TMP8]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP7]]) -; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFALWAYS-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; TFALWAYS-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; TFALWAYS-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFALWAYS-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFALWAYS-NEXT: [[TMP7:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFALWAYS-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFALWAYS-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], splat (double -0.000000e+00) +; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) +; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP9:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFALWAYS-NEXT: [[TMP10:%.*]] = extractelement [[TMP9]], i32 0 -; TFALWAYS-NEXT: br i1 [[TMP10]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TFALWAYS-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; TFALWAYS-NEXT: br i1 [[TMP13]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFALWAYS: [[FOR_COND_CLEANUP]]: -; TFALWAYS-NEXT: ret double [[TMP8]] +; TFALWAYS-NEXT: ret double [[TMP11]] ; ; TFFALLBACK-LABEL: define double @test_widen_fmuladd_and_call( ; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFFALLBACK-NEXT: [[ENTRY:.*]]: -; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -821,33 +821,33 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP2]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFFALLBACK-NEXT: [[TMP3:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFFALLBACK-NEXT: [[TMP4:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFFALLBACK-NEXT: [[TMP5:%.*]] = call @foo_vector( [[TMP4]], [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP5]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP3]], splat (double -0.000000e+00) -; TFFALLBACK-NEXT: [[TMP8]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP7]]) -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; TFFALLBACK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; TFFALLBACK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFFALLBACK-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFFALLBACK-NEXT: [[TMP7:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFFALLBACK-NEXT: [[TMP8:%.*]] = call @foo_vector( [[TMP7]], [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFFALLBACK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], splat (double -0.000000e+00) +; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP9:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFFALLBACK-NEXT: [[TMP10:%.*]] = extractelement [[TMP9]], i32 0 -; TFFALLBACK-NEXT: br i1 [[TMP10]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; TFFALLBACK-NEXT: br i1 [[TMP13]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFFALLBACK: [[FOR_COND_CLEANUP]]: -; TFFALLBACK-NEXT: ret double [[TMP8]] +; TFFALLBACK-NEXT: ret double [[TMP11]] ; ; TFA_INTERLEAVE-LABEL: define double @test_widen_fmuladd_and_call( ; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] { ; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]: -; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M]], i64 0 ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -855,40 +855,40 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[TMP4]], i64 [[TMP6]] -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP4]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK2]], poison) -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fmul [[WIDE_MASKED_LOAD3]], [[BROADCAST_SPLAT]] -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = fptoui [[WIDE_MASKED_LOAD3]] to -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call @foo_vector( [[TMP10]], [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = call @foo_vector( [[TMP11]], [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP16]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP12]], ptr [[TMP14]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP13]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], splat (double -0.000000e+00) -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP18]]) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP9]], splat (double -0.000000e+00) -; TFA_INTERLEAVE-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP19]], [[TMP20]]) -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = add i64 [[INDEX_NEXT]], [[TMP23]] +; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP9]] +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv2f64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK2]], poison) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = fmul [[WIDE_MASKED_LOAD3]], [[BROADCAST_SPLAT]] +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fptoui [[WIDE_MASKED_LOAD]] to +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fptoui [[WIDE_MASKED_LOAD3]] to +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[TMP13]], [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[TMP14]], [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP15]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP16]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], splat (double -0.000000e+00) +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP21]]) +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], splat (double -0.000000e+00) +; TFA_INTERLEAVE-NEXT: [[TMP24]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP22]], [[TMP23]]) +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP24]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement [[TMP25]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP26]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFA_INTERLEAVE: [[FOR_COND_CLEANUP]]: -; TFA_INTERLEAVE-NEXT: ret double [[TMP21]] +; TFA_INTERLEAVE-NEXT: ret double [[TMP24]] ; entry: br label %for.body @@ -996,7 +996,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]] ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: -; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ] +; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8 @@ -1005,8 +1005,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00 -; TFA_INTERLEAVE-NEXT: store double [[PREDPHI]], ptr [[P]], align 8 +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00 +; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]] ; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]: ; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]] @@ -1014,11 +1014,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]] ; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true -; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00 -; TFA_INTERLEAVE-NEXT: store double [[PREDPHI3]], ptr [[P]], align 8 +; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00 +; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]] ; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]: -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT6]] = icmp ult i64 [[TMP11]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 82f64e988e6d3..8ba3debd46c8e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -230,54 +230,54 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP18]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP19]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] -; CHECK-UNORDERED-NEXT: [[TMP20]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP18]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[TMP19]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP20]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] +; CHECK-UNORDERED-NEXT: [[TMP21]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP18]], [[TMP17]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP19]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP20]], [[BIN_RDX7]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP19]], [[TMP18]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP20]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP21]], [[BIN_RDX7]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP23]], [[SUM_07]] +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP24]], [[SUM_07]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fadd_strict_unroll @@ -297,47 +297,47 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP17]], [[WIDE_LOAD1]]) -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], [[WIDE_LOAD2]]) -; CHECK-ORDERED-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP19]], [[WIDE_LOAD3]]) +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], [[WIDE_LOAD1]]) +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP19]], [[WIDE_LOAD2]]) +; CHECK-ORDERED-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP20]], [[WIDE_LOAD3]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP22]], [[SUM_07]] +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP23]], [[SUM_07]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict_unroll @@ -378,47 +378,47 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP19]] -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP25]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP20]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP27]]) -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP29]]) -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], [[TMP31]]) -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP34]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], [[TMP33]]) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP28]]) +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select [[ACTIVE_LANE_MASK6]], [[WIDE_MASKED_LOAD9]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], [[TMP30]]) +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select [[ACTIVE_LANE_MASK7]], [[WIDE_MASKED_LOAD10]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], [[TMP32]]) +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP35]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], [[TMP34]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = add i64 [[INDEX]], [[TMP39]] -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = add i64 [[INDEX]], [[TMP37]] +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], [[TMP40]] +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = mul i64 [[TMP42]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], [[TMP43]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP37]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP40]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP43]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = extractelement [[TMP44]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP38]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP41]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP44]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = extractelement [[TMP45]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -429,13 +429,13 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP46]], [[SUM_07]] +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP47]], [[SUM_07]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] ; @@ -643,34 +643,34 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = sub i64 [[TMP2]], [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP2]], [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv8i1( [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]]) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP13]], i32 4, [[INTERLEAVED_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP14]], i32 4, [[INTERLEAVED_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_MASKED_VEC]]) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP16]]) -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP18]]) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP19]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = extractelement [[TMP20]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP13]]) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -683,18 +683,18 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP22]], [[ADD_PHI2]] +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP23]], [[ADD_PHI2]] ; CHECK-ORDERED-TF-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP23]], [[ADD_PHI1]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP24]], [[ADD_PHI1]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-TF-NEXT: ret void @@ -1396,71 +1396,71 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP18]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP21]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP28]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP29]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP30]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP31]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP19]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP22]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP25]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP28]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP29]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP30]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP31]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP32]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP29]], [[TMP28]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP30]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP31]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP30]], [[TMP29]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP31]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP32]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP34]], float [[TMP35]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict @@ -1480,68 +1480,68 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP21]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP28]]) -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], [[TMP29]]) +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP19]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP22]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP25]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP28]], align 4 +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP29]]) ; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], [[TMP30]]) -; CHECK-ORDERED-NEXT: [[TMP35]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) +; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) +; CHECK-ORDERED-NEXT: [[TMP36]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], [[TMP32]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP37]], float [[TMP38]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP38]], float [[TMP39]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict @@ -1582,66 +1582,66 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP19]] -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP25]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP20]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP30]] -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP33]] -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP28]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP31]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP34]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP37]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP38]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP42]]) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP39]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP43]], [[TMP44]]) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP40]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP45]], [[TMP46]]) -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP41]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP49]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP47]], [[TMP48]]) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP31]] +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP34]] +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP37]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP38]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP39]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP43]]) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select [[ACTIVE_LANE_MASK6]], [[TMP40]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], [[TMP45]]) +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP41]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], [[TMP47]]) +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP42]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], [[TMP49]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = mul i64 [[TMP50]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], [[TMP51]] -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = mul i64 [[TMP53]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = add i64 [[INDEX]], [[TMP54]] -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = add i64 [[INDEX]], [[TMP57]] +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = add i64 [[INDEX]], [[TMP58]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP52]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP55]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP58]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = extractelement [[TMP59]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement [[TMP60]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1652,15 +1652,15 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP61]], float [[TMP62]], float [[SUM_07]]) +; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP62]], float [[TMP63]], float [[SUM_07]]) ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; @@ -1724,71 +1724,71 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP18]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP21]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP28]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP29]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP30]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP31]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 +; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] +; CHECK-UNORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP19]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP22]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP25]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load , ptr [[TMP28]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP29]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD]], [[WIDE_LOAD7]], [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP30]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP31]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP32]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP29]], [[TMP28]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP30]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP31]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP30]], [[TMP29]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP31]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP32]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP34]], float [[TMP35]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict_fmf @@ -1808,68 +1808,68 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP12]] -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 24 -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP15]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP13]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP18]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP21]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP24]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP27]], align 4 -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP28]]) -; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], [[TMP29]]) +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP10]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP13]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 24 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 16 +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP24]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 24 +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP27]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP19]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP22]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP25]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP28]], align 4 +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP29]]) ; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], [[TMP30]]) -; CHECK-ORDERED-NEXT: [[TMP35]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) +; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[TMP31]]) +; CHECK-ORDERED-NEXT: [[TMP36]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], [[TMP32]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP35]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP36]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP37]], float [[TMP38]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP38]], float [[TMP39]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP36]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict_fmf @@ -1910,66 +1910,66 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP19]] -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP22]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP25]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP20]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 0 -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP30]] -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = mul i64 [[TMP32]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP33]] -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[TMP36]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP28]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP31]], i32 4, [[ACTIVE_LANE_MASK6]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP34]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP37]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP38]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP42]]) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP39]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP43]], [[TMP44]]) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP40]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP45]], [[TMP46]]) -; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP41]], splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP49]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP47]], [[TMP48]]) +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP20]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP23]] +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP26]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 0 +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP31]] +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP34]] +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP37]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, [[ACTIVE_LANE_MASK6]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP38]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul nnan [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul nnan [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul nnan [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul nnan [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan [[ACTIVE_LANE_MASK]], [[TMP39]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP43]]) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select nnan [[ACTIVE_LANE_MASK6]], [[TMP40]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], [[TMP45]]) +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select nnan [[ACTIVE_LANE_MASK7]], [[TMP41]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], [[TMP47]]) +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP42]], splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], [[TMP49]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = mul i64 [[TMP50]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], [[TMP51]] -; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = mul i64 [[TMP53]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = add i64 [[INDEX]], [[TMP54]] -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = add i64 [[INDEX]], [[TMP57]] +; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul i64 [[TMP54]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = add i64 [[INDEX]], [[TMP58]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP52]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP55]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP58]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = extractelement [[TMP59]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement [[TMP60]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1980,15 +1980,15 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP62:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP61]], float [[TMP62]], float [[SUM_07]]) +; CHECK-ORDERED-TF-NEXT: [[TMP63:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP62]], float [[TMP63]], float [[SUM_07]]) ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; From e6ae19998f97bb5f1d5a799a78bb88370ccfe788 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Apr 2025 12:53:07 +0100 Subject: [PATCH 7/7] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- llvm/lib/Transforms/Vectorize/VPlan.h | 15 +++++++++++---- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8ee08c94cc2fc..14383eeea7572 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2486,7 +2486,7 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { PreVectorPH->swapSuccessors(); // We just connected a new block to the scalar preheader. Update all - // ResumePhis by adding an incoming value for it, replacing the last value. + // ResumePhis by adding an incoming value for it, replicating the last value. for (VPRecipeBase &R : *cast(ScalarPH)) { auto *ResumePhi = dyn_cast(&R); if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi) @@ -2661,7 +2661,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarPreHeader = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, Twine(Prefix) + "scalar.ph"); - // NOTE: The Plan's scalar preheader isn't replaced with a VPIRBasicBlock + // NOTE: The Plan's scalar preheader VPBB isn't replaced with a VPIRBasicBlock // wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar // preheader may be unreachable at this point. Instead it is replaced in // createVectorizedLoopSkeleton. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b528151c8ef25..18eac5bb79994 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3529,18 +3529,25 @@ class VPlan { /// Returns the 'middle' block of the plan, that is the block that selects /// whether to execute the scalar tail loop or the exit block from the loop - /// latch. If the scalar tail loop or exit block are known to always execute, - /// the middle block may branch directly to the block. + /// latch. If there is an early exit from the vector loop, the middle block + /// conceptully has the early exit block as third successor, split accross 2 + /// VPBBs. In that case, the second VPBB selects whether to execute the scalar + /// tail loop or the exit bock. If the scalar tail loop or exit block are + /// known to always execute, the middle block may branch directly to that + /// block. If there is no loop region, the middle block cannot be identified + /// and nullptr is returned. VPBasicBlock *getMiddleBlock() { VPRegionBlock *LoopRegion = getVectorLoopRegion(); if (!LoopRegion) return nullptr; - auto *RegionSucc = LoopRegion->getSingleSuccessor(); + auto *RegionSucc = cast(LoopRegion->getSingleSuccessor()); if (RegionSucc->getSingleSuccessor() || is_contained(RegionSucc->getSuccessors(), getScalarPreheader())) - return cast(RegionSucc); + return RegionSucc; + // There is an early exit. The successor of RegionSucc is the middle block. return cast(RegionSucc->getSuccessors()[1]); } + const VPBasicBlock *getMiddleBlock() const { return const_cast(this)->getMiddleBlock(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b5cf0262b7d3e..098e35fbe5bbb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1684,7 +1684,7 @@ void VPlanTransforms::truncateToMinimalBitwidths( /// Remove BranchOnCond recipes with true conditions together with removing /// dead edges to their successors. -static void simplifyBranchOnCondTrue(VPlan &Plan) { +static void removeBranchOnCondTrue(VPlan &Plan) { using namespace llvm::VPlanPatternMatch; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getEntry()))) { @@ -1737,7 +1737,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { runPass(legalizeAndOptimizeInductions, Plan); runPass(removeRedundantExpandSCEVRecipes, Plan); runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType()); - runPass(simplifyBranchOnCondTrue, Plan); + runPass(removeBranchOnCondTrue, Plan); runPass(removeDeadRecipes, Plan); runPass(createAndOptimizeReplicateRegions, Plan);