diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a1d7515f031cf..5786c89a564e3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -840,8 +840,123 @@ class InstructionsState { static InstructionsState invalid() { return {nullptr, nullptr}; } }; +struct InterchangeableInstruction { + unsigned Opcode; + SmallVector Ops; + template + InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args) + : Opcode(Opcode), Ops{std::forward(Args)...} {} +}; + +bool operator<(const InterchangeableInstruction &LHS, + const InterchangeableInstruction &RHS) { + return LHS.Opcode < RHS.Opcode; +} + } // end anonymous namespace +/// \returns a sorted list of interchangeable instructions by instruction opcode +/// that \p I can be converted to. +/// e.g., +/// x << y -> x * (2^y) +/// x << 1 -> x * 2 +/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0 +/// x * 0 -> x & 0 +/// x * -1 -> 0 - x +/// TODO: support more patterns +static SmallVector +getInterchangeableInstruction(Instruction *I) { + // PII = Possible Interchangeable Instruction + SmallVector PII; + unsigned Opcode = I->getOpcode(); + PII.emplace_back(Opcode, I->operands()); + if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub, + Instruction::Add}, + Opcode)) + return PII; + Constant *C; + if (match(I, m_BinOp(m_Value(), m_Constant(C)))) { + ConstantInt *V = nullptr; + if (auto *CI = dyn_cast(C)) { + V = CI; + } else if (auto *CDV = dyn_cast(C)) { + if (auto *CI = dyn_cast_if_present(CDV->getSplatValue())) + V = CI; + } + if (!V) + return PII; + Value *Op0 = I->getOperand(0); + Type *Op1Ty = I->getOperand(1)->getType(); + const APInt &Op1Int = V->getValue(); + Constant *Zero = + ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth())); + Constant *UnsignedMax = + ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth())); + switch (Opcode) { + case Instruction::Shl: { + PII.emplace_back(Instruction::Mul, Op0, + ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue())); + if (Op1Int.isZero()) { + PII.emplace_back(Instruction::Sub, Op0, Zero); + PII.emplace_back(Instruction::Add, Op0, Zero); + PII.emplace_back(Instruction::And, Op0, UnsignedMax); + PII.emplace_back(Instruction::Or, Op0, Zero); + } + break; + } + case Instruction::Mul: { + if (Op1Int.isOne()) { + PII.emplace_back(Instruction::Sub, Op0, Zero); + PII.emplace_back(Instruction::Add, Op0, Zero); + PII.emplace_back(Instruction::And, Op0, UnsignedMax); + PII.emplace_back(Instruction::Or, Op0, Zero); + } else if (Op1Int.isZero()) { + PII.emplace_back(Instruction::And, Op0, Zero); + } else if (Op1Int.isAllOnes()) { + PII.emplace_back(Instruction::Sub, Zero, Op0); + } + break; + } + case Instruction::Sub: + if (Op1Int.isZero()) { + PII.emplace_back(Instruction::Add, Op0, Zero); + PII.emplace_back(Instruction::And, Op0, UnsignedMax); + PII.emplace_back(Instruction::Or, Op0, Zero); + } + break; + case Instruction::Add: + if (Op1Int.isZero()) { + PII.emplace_back(Instruction::And, Op0, UnsignedMax); + PII.emplace_back(Instruction::Or, Op0, Zero); + } + break; + } + } + // std::set_intersection requires a sorted range. + sort(PII); + return PII; +} + +/// \returns the Op and operands which \p I convert to. +static std::pair> +getInterchangeableInstruction(Instruction *I, Instruction *MainOp, + Instruction *AltOp) { + SmallVector IIList = + getInterchangeableInstruction(I); + const auto *Iter = find_if(IIList, [&](const InterchangeableInstruction &II) { + return II.Opcode == MainOp->getOpcode(); + }); + if (Iter == IIList.end()) { + Iter = find_if(IIList, [&](const InterchangeableInstruction &II) { + return II.Opcode == AltOp->getOpcode(); + }); + assert(Iter != IIList.end() && + "Cannot find an interchangeable instruction."); + return std::make_pair(AltOp, Iter->Ops); + } + return std::make_pair(MainOp, Iter->Ops); +} + /// \returns true if \p Opcode is allowed as part of the main/alternate /// instruction for SLP vectorization. /// @@ -955,6 +1070,22 @@ static InstructionsState getSameOpcode(ArrayRef VL, return InstructionsState::invalid(); } bool AnyPoison = InstCnt != VL.size(); + // Currently, this is only used for binary ops. + // TODO: support all instructions + SmallVector InterchangeableOpcode = + getInterchangeableInstruction(cast(V)); + SmallVector AlternateInterchangeableOpcode; + auto UpdateInterchangeableOpcode = + [](SmallVector &LHS, + ArrayRef RHS) { + SmallVector NewInterchangeableOpcode; + std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(), + std::back_inserter(NewInterchangeableOpcode)); + if (NewInterchangeableOpcode.empty()) + return false; + LHS.swap(NewInterchangeableOpcode); + return true; + }; for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { auto *I = dyn_cast(VL[Cnt]); if (!I) @@ -967,14 +1098,32 @@ static InstructionsState getSameOpcode(ArrayRef VL, return InstructionsState::invalid(); unsigned InstOpcode = I->getOpcode(); if (IsBinOp && isa(I)) { - if (InstOpcode == Opcode || InstOpcode == AltOpcode) + SmallVector ThisInterchangeableOpcode( + getInterchangeableInstruction(I)); + if (UpdateInterchangeableOpcode(InterchangeableOpcode, + ThisInterchangeableOpcode)) continue; - if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) && - isValidForAlternation(Opcode)) { - AltOpcode = InstOpcode; - AltIndex = Cnt; + if (AlternateInterchangeableOpcode.empty()) { + InterchangeableOpcode.erase( + remove_if(InterchangeableOpcode, + [](const InterchangeableInstruction &I) { + return !isValidForAlternation(I.Opcode); + }), + InterchangeableOpcode.end()); + ThisInterchangeableOpcode.erase( + remove_if(ThisInterchangeableOpcode, + [](const InterchangeableInstruction &I) { + return !isValidForAlternation(I.Opcode); + }), + ThisInterchangeableOpcode.end()); + if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty()) + return InstructionsState::invalid(); + AlternateInterchangeableOpcode.swap(ThisInterchangeableOpcode); continue; } + if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode, + ThisInterchangeableOpcode)) + continue; } else if (IsCastOp && isa(I)) { Value *Op0 = IBase->getOperand(0); Type *Ty0 = Op0->getType(); @@ -1075,6 +1224,24 @@ static InstructionsState getSameOpcode(ArrayRef VL, return InstructionsState::invalid(); } + if (IsBinOp) { + auto FindOp = [&](ArrayRef CandidateOp) { + for (Value *V : VL) { + if (isa(V)) + continue; + for (const InterchangeableInstruction &I : CandidateOp) + if (cast(V)->getOpcode() == I.Opcode) + return cast(V); + } + llvm_unreachable( + "Cannot find the candidate instruction for InstructionsState."); + }; + Instruction *MainOp = FindOp(InterchangeableOpcode); + Instruction *AltOp = AlternateInterchangeableOpcode.empty() + ? MainOp + : FindOp(AlternateInterchangeableOpcode); + return InstructionsState(MainOp, AltOp); + } return InstructionsState(cast(V), cast(VL[AltIndex])); } @@ -2405,42 +2572,46 @@ class BoUpSLP { } /// Go through the instructions in VL and append their operands. - void appendOperandsOfVL(ArrayRef VL, Instruction *VL0) { + void appendOperandsOfVL(ArrayRef VL, Instruction *MainOp, + Instruction *AltOp) { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); // IntrinsicInst::isCommutative returns true if swapping the first "two" // arguments to the intrinsic produces the same result. constexpr unsigned IntrinsicNumOperands = 2; - unsigned NumOperands = VL0->getNumOperands(); - ArgSize = isa(VL0) ? IntrinsicNumOperands : NumOperands; + unsigned NumOperands = MainOp->getNumOperands(); + ArgSize = isa(MainOp) ? IntrinsicNumOperands : NumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); - for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + for (unsigned OpIdx : seq(NumOperands)) OpsVec[OpIdx].resize(NumLanes); - for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { - assert((isa(VL[Lane]) || isa(VL[Lane])) && - "Expected instruction or poison value"); - // Our tree has just 3 nodes: the root and two operands. - // It is therefore trivial to get the APO. We only need to check the - // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or - // RHS operand. The LHS operand of both add and sub is never attached - // to an inversese operation in the linearized form, therefore its APO - // is false. The RHS is true only if VL[Lane] is an inverse operation. - - // Since operand reordering is performed on groups of commutative - // operations or alternating sequences (e.g., +, -), we can safely - // tell the inverse operations by checking commutativity. - if (isa(VL[Lane])) { + for (auto [Lane, V] : enumerate(VL)) { + assert((isa(V) || isa(V)) && + "Expected instruction or poison value"); + if (isa(V)) { + for (unsigned OpIdx : seq(NumOperands)) OpsVec[OpIdx][Lane] = { - PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true, + PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true, false}; - continue; - } - bool IsInverseOperation = !isCommutative(cast(VL[Lane])); + continue; + } + auto [SelectedOp, Ops] = + getInterchangeableInstruction(cast(V), MainOp, AltOp); + // Our tree has just 3 nodes: the root and two operands. + // It is therefore trivial to get the APO. We only need to check the + // opcode of V and whether the operand at OpIdx is the LHS or RHS + // operand. The LHS operand of both add and sub is never attached to an + // inversese operation in the linearized form, therefore its APO is + // false. The RHS is true only if V is an inverse operation. + + // Since operand reordering is performed on groups of commutative + // operations or alternating sequences (e.g., +, -), we can safely + // tell the inverse operations by checking commutativity. + bool IsInverseOperation = !isCommutative(cast(SelectedOp)); + for (unsigned OpIdx : seq(NumOperands)) { bool APO = (OpIdx == 0) ? false : IsInverseOperation; - OpsVec[OpIdx][Lane] = {cast(VL[Lane])->getOperand(OpIdx), - APO, false}; + OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false}; } } } @@ -2547,11 +2718,12 @@ class BoUpSLP { public: /// Initialize with all the operands of the instruction vector \p RootVL. - VLOperands(ArrayRef RootVL, Instruction *VL0, const BoUpSLP &R) + VLOperands(ArrayRef RootVL, Instruction *MainOp, + Instruction *AltOp, const BoUpSLP &R) : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R), - L(R.LI->getLoopFor((VL0->getParent()))) { + L(R.LI->getLoopFor(MainOp->getParent())) { // Append all the operands of RootVL. - appendOperandsOfVL(RootVL, VL0); + appendOperandsOfVL(RootVL, MainOp, AltOp); } /// \Returns a value vector with the operands across all lanes for the @@ -3343,7 +3515,7 @@ class BoUpSLP { /// Set this bundle's operand from Scalars. void setOperand(const BoUpSLP &R, bool RequireReorder = false) { - VLOperands Ops(Scalars, MainOp, R); + VLOperands Ops(Scalars, MainOp, AltOp, R); if (RequireReorder) Ops.reorder(); for (unsigned I : seq(MainOp->getNumOperands())) @@ -8559,7 +8731,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; - VLOperands Ops(VL, VL0, *this); + VLOperands Ops(VL, VL0, S.getAltOp(), *this); if (cast(VL0)->isCommutative()) { // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. @@ -15617,7 +15789,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = Builder.CreateBinOp( static_cast(E->getOpcode()), LHS, RHS); - propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end()); + propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end()); if (auto *I = dyn_cast(V)) { V = ::propagateMetadata(I, E->Scalars); // Drop nuw flags for abs(sub(commutative), true). diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll index feb4ad865f314..c65df26fa0d5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll @@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) { ; ; POW2-ONLY-LABEL: @store_try_reorder( ; POW2-ONLY-NEXT: entry: -; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 -; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 -; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 -; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 +; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 +; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0 +; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2 +; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4 ; POW2-ONLY-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll index fd3d4ab80b29c..74d7f1c91f3bf 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll @@ -7,19 +7,18 @@ define void @test(ptr %a, i64 %0) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 0 ; CHECK-NEXT: br label %[[BB:.*]] ; CHECK: [[BB]]: -; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]] -; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) -; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP9]], [[TMP11]] +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP12]], ptr align 8 [[TMP8]], i64 -8, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: br label %[[BB]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll index 7ab5e4d6cb787..89b87a3a45d12 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll @@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) { ; ; POW2-ONLY-LABEL: @store_try_reorder( ; POW2-ONLY-NEXT: entry: -; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 -; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 -; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 -; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 +; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 +; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0 +; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2 +; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4 ; POW2-ONLY-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll index f46a5d84a86cc..e3b4898e85212 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -10,10 +10,8 @@ define i32 @foo(ptr nocapture %A, i32 %n) { ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar() ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll index 889f5a95c81d6..7af0c64f18748 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll @@ -4,22 +4,17 @@ define void @test(ptr %0, ptr %1, ptr %2) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4 -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> , [[TMP8]] -; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], -; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> , [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP2:%.*]], align 4 ; CHECK-NEXT: ret void ; %4 = load i32, ptr %1, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll index c976525b6720e..d474a5f2cecae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -4,21 +4,17 @@ define i64 @foo(i32 %tmp7) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP5:%.*]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = sub i32 undef, 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[TMP24]], i32 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> , i32 [[TMP24]], i32 6 -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP10]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[TMP7:%.*]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP8]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], 0 ; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP64]] ; @@ -29,7 +25,7 @@ bb: %tmp4 = xor i32 %tmp3, 0 %tmp6 = sub i32 0, 0 %tmp8 = sub i32 %tmp7, 0 - %tmp9 = sub nsw i32 0, undef + %tmp9 = sub nsw i32 0, poison %tmp10 = add nsw i32 0, %tmp6 %tmp11 = sub nsw i32 0, %tmp8 %tmp12 = add i32 0, %tmp10 @@ -44,10 +40,10 @@ bb: %tmp21 = add i32 %tmp20, %tmp17 %tmp22 = sub i32 0, 0 %tmp23 = add i32 0, 0 - %tmp24 = sub i32 undef, 0 - %tmp25 = add nsw i32 %tmp23, undef + %tmp24 = sub i32 poison, 0 + %tmp25 = add nsw i32 %tmp23, poison %tmp26 = add nsw i32 %tmp24, %tmp22 - %tmp27 = sub nsw i32 undef, %tmp24 + %tmp27 = sub nsw i32 poison, %tmp24 %tmp28 = add i32 0, %tmp25 %tmp29 = xor i32 %tmp28, 0 %tmp30 = add i32 0, %tmp26 @@ -58,7 +54,7 @@ bb: %tmp35 = add i32 %tmp34, %tmp29 %tmp36 = add i32 %tmp35, 0 %tmp37 = add i32 %tmp36, %tmp33 - %tmp38 = sub nsw i32 0, undef + %tmp38 = sub nsw i32 0, poison %tmp39 = add i32 0, %tmp38 %tmp40 = xor i32 %tmp39, 0 %tmp41 = add i32 0, %tmp37 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll index 02c3173adc654..fc62b0b38fd53 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -9,12 +9,10 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9) +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]] ; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP6]], [[M]] ; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[EXTERNALUSE1]], [[EXTERNALUSE2]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll index 2a5bfa7390770..daab4b6ea4c95 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll @@ -8,10 +8,8 @@ define i32 @test() { ; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[A_PROMOTED]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i16> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll index e6a166c27ac49..94f2c79faa8c9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll @@ -9,9 +9,7 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[D]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> -; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 +; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll index 6e2a43ac5f9f1..15dd6756cd7db 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll @@ -242,13 +242,18 @@ exit: } define void @store_try_reorder(ptr %dst) { -; CHECK-LABEL: @store_try_reorder( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 -; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 -; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 -; CHECK-NEXT: ret void +; NON-POW2-LABEL: @store_try_reorder( +; NON-POW2-NEXT: entry: +; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 +; NON-POW2-NEXT: ret void +; +; POW2-ONLY-LABEL: @store_try_reorder( +; POW2-ONLY-NEXT: entry: +; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 +; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0 +; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2 +; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4 +; POW2-ONLY-NEXT: ret void ; entry: %add = add i32 0, 0 diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll index c250029519590..e4eff0f72b356 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll @@ -1,18 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} -; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %} define <2 x i32> @test(i32 %arg) { -; CHECK-LABEL: define <2 x i32> @test( -; CHECK-SAME: i32 [[ARG:%.*]]) { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0 -; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1 -; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1 -; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; X86-LABEL: define <2 x i32> @test( +; X86-SAME: i32 [[ARG:%.*]]) { +; X86-NEXT: bb: +; X86-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0 +; X86-NEXT: [[MUL:%.*]] = mul i32 0, 1 +; X86-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]] +; X86-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] +; X86-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0 +; X86-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1 +; X86-NEXT: ret <2 x i32> [[TMP1]] +; +; AARCH64-LABEL: define <2 x i32> @test( +; AARCH64-SAME: i32 [[ARG:%.*]]) { +; AARCH64-NEXT: bb: +; AARCH64-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ARG]], i32 0 +; AARCH64-NEXT: [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer +; AARCH64-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; AARCH64-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; AARCH64-NEXT: [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]] +; AARCH64-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] +; AARCH64-NEXT: ret <2 x i32> [[TMP1]] ; bb: %or = or i32 %arg, 0 @@ -23,4 +34,3 @@ bb: %1 = insertelement <2 x i32> %0, i32 %mul, i32 1 ret <2 x i32> %1 } - diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll index 61a84a67c9ff1..6f9768af38caf 100644 --- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll +++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll @@ -5,15 +5,13 @@ define void @func(i32 %0) { ; CHECK-LABEL: define void @func( ; CHECK-SAME: i32 [[TMP0:%.*]]) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <32 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i32> [[TMP11]], <32 x i32> , <32 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 0, i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16) @@ -24,61 +22,61 @@ define void @func(i32 %0) { ; CHECK-NEXT: [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP20]], i32 31 -; CHECK-NEXT: [[TMP22:%.*]] = and i1 false, [[TMP21]] +; CHECK-NEXT: [[TMP76:%.*]] = and i1 false, [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP20]], i32 30 -; CHECK-NEXT: [[TMP24:%.*]] = and i1 false, [[TMP23]] +; CHECK-NEXT: [[TMP22:%.*]] = and i1 false, [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP20]], i32 29 -; CHECK-NEXT: [[TMP26:%.*]] = and i1 false, [[TMP25]] +; CHECK-NEXT: [[TMP24:%.*]] = and i1 false, [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP20]], i32 28 -; CHECK-NEXT: [[TMP28:%.*]] = and i1 false, [[TMP27]] +; CHECK-NEXT: [[TMP26:%.*]] = and i1 false, [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP20]], i32 27 -; CHECK-NEXT: [[TMP30:%.*]] = and i1 false, [[TMP29]] +; CHECK-NEXT: [[TMP28:%.*]] = and i1 false, [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP20]], i32 26 -; CHECK-NEXT: [[TMP32:%.*]] = and i1 false, [[TMP31]] +; CHECK-NEXT: [[TMP30:%.*]] = and i1 false, [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i1> [[TMP20]], i32 25 -; CHECK-NEXT: [[TMP34:%.*]] = and i1 false, [[TMP33]] +; CHECK-NEXT: [[TMP32:%.*]] = and i1 false, [[TMP33]] ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i1> [[TMP20]], i32 24 -; CHECK-NEXT: [[TMP36:%.*]] = and i1 false, [[TMP35]] +; CHECK-NEXT: [[TMP34:%.*]] = and i1 false, [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP20]], i32 23 -; CHECK-NEXT: [[TMP38:%.*]] = and i1 false, [[TMP37]] +; CHECK-NEXT: [[TMP36:%.*]] = and i1 false, [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <32 x i1> [[TMP20]], i32 22 -; CHECK-NEXT: [[TMP40:%.*]] = and i1 false, [[TMP39]] +; CHECK-NEXT: [[TMP38:%.*]] = and i1 false, [[TMP39]] ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP20]], i32 21 -; CHECK-NEXT: [[TMP42:%.*]] = and i1 false, [[TMP41]] +; CHECK-NEXT: [[TMP40:%.*]] = and i1 false, [[TMP41]] ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i1> [[TMP20]], i32 20 -; CHECK-NEXT: [[TMP44:%.*]] = and i1 false, [[TMP43]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 false, [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <32 x i1> [[TMP20]], i32 19 -; CHECK-NEXT: [[TMP46:%.*]] = and i1 false, [[TMP45]] +; CHECK-NEXT: [[TMP44:%.*]] = and i1 false, [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP20]], i32 18 -; CHECK-NEXT: [[TMP48:%.*]] = and i1 false, [[TMP47]] +; CHECK-NEXT: [[TMP46:%.*]] = and i1 false, [[TMP47]] ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i1> [[TMP20]], i32 17 -; CHECK-NEXT: [[TMP50:%.*]] = and i1 false, [[TMP49]] +; CHECK-NEXT: [[TMP48:%.*]] = and i1 false, [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <32 x i1> [[TMP20]], i32 16 -; CHECK-NEXT: [[TMP52:%.*]] = and i1 false, [[TMP51]] +; CHECK-NEXT: [[TMP50:%.*]] = and i1 false, [[TMP51]] ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i1> [[TMP20]], i32 15 -; CHECK-NEXT: [[TMP54:%.*]] = and i1 false, [[TMP53]] +; CHECK-NEXT: [[TMP52:%.*]] = and i1 false, [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP20]], i32 14 -; CHECK-NEXT: [[TMP56:%.*]] = and i1 false, [[TMP55]] +; CHECK-NEXT: [[TMP54:%.*]] = and i1 false, [[TMP55]] ; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP20]], i32 13 -; CHECK-NEXT: [[TMP58:%.*]] = and i1 false, [[TMP57]] +; CHECK-NEXT: [[TMP56:%.*]] = and i1 false, [[TMP57]] ; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP20]], i32 12 -; CHECK-NEXT: [[TMP60:%.*]] = and i1 false, [[TMP59]] +; CHECK-NEXT: [[TMP58:%.*]] = and i1 false, [[TMP59]] ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP20]], i32 11 -; CHECK-NEXT: [[TMP62:%.*]] = and i1 false, [[TMP61]] +; CHECK-NEXT: [[TMP60:%.*]] = and i1 false, [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i1> [[TMP20]], i32 10 -; CHECK-NEXT: [[TMP64:%.*]] = and i1 false, [[TMP63]] +; CHECK-NEXT: [[TMP62:%.*]] = and i1 false, [[TMP63]] ; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i1> [[TMP20]], i32 9 -; CHECK-NEXT: [[TMP66:%.*]] = and i1 false, [[TMP65]] +; CHECK-NEXT: [[TMP64:%.*]] = and i1 false, [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP20]], i32 8 -; CHECK-NEXT: [[TMP68:%.*]] = and i1 false, [[TMP67]] +; CHECK-NEXT: [[TMP66:%.*]] = and i1 false, [[TMP67]] ; CHECK-NEXT: [[TMP69:%.*]] = extractelement <32 x i1> [[TMP20]], i32 7 -; CHECK-NEXT: [[TMP70:%.*]] = and i1 false, [[TMP69]] +; CHECK-NEXT: [[TMP68:%.*]] = and i1 false, [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i1> [[TMP20]], i32 6 -; CHECK-NEXT: [[TMP72:%.*]] = and i1 false, [[TMP71]] +; CHECK-NEXT: [[TMP70:%.*]] = and i1 false, [[TMP71]] ; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i1> [[TMP20]], i32 5 -; CHECK-NEXT: [[TMP74:%.*]] = and i1 false, [[TMP73]] +; CHECK-NEXT: [[TMP72:%.*]] = and i1 false, [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = extractelement <32 x i1> [[TMP20]], i32 4 -; CHECK-NEXT: [[TMP76:%.*]] = and i1 false, [[TMP75]] +; CHECK-NEXT: [[TMP74:%.*]] = and i1 false, [[TMP75]] ; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i32> [[TMP18]], i32 0 ; CHECK-NEXT: [[TMP78:%.*]] = zext i32 [[TMP77]] to i64 ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr float, ptr addrspace(1) null, i64 [[TMP78]] diff --git a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll index 732b50396a460..1e3255f2187af 100644 --- a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll +++ b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll @@ -12,9 +12,7 @@ define i32 @test() { ; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP5]] = or <2 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb4: ; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]