diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ed4541f66740e..c9f142d64ae9e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -259,6 +259,33 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost; } + /// Checks if the provided mask \p is a splat mask, i.e. it contains only -1 + /// or same non -1 index value and this index value contained at least twice. + /// So, mask <0, -1,-1, -1> is not considered splat (it is just identity), + /// same for <-1, 0, -1, -1> (just a slide), while <2, -1, 2, -1> is a splat + /// with \p Index=2. + static bool isSplatMask(ArrayRef Mask, unsigned NumSrcElts, int &Index) { + // Check that the broadcast index meets at least twice. + bool IsCompared = false; + if (int SplatIdx = PoisonMaskElem; + all_of(enumerate(Mask), [&](const auto &P) { + if (P.value() == PoisonMaskElem) + return P.index() != Mask.size() - 1 || IsCompared; + if (static_cast(P.value()) >= NumSrcElts * 2) + return false; + if (SplatIdx == PoisonMaskElem) { + SplatIdx = P.value(); + return P.index() != Mask.size() - 1; + } + IsCompared = true; + return SplatIdx == P.value(); + })) { + Index = SplatIdx; + return true; + } + return false; + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -1014,17 +1041,20 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Kind; int NumSrcElts = Ty->getElementCount().getKnownMinValue(); switch (Kind) { - case TTI::SK_PermuteSingleSrc: + case TTI::SK_PermuteSingleSrc: { if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts)) return TTI::SK_Reverse; if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts)) return TTI::SK_Broadcast; + if (isSplatMask(Mask, NumSrcElts, Index)) + return TTI::SK_Broadcast; if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) && (Index + Mask.size()) <= (size_t)NumSrcElts) { SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size()); return TTI::SK_ExtractSubvector; } break; + } case TTI::SK_PermuteTwoSrc: { int NumSubElts; if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask( diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fd167b0036e9c..57f3016fbe1e0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13199,6 +13199,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // No perfect match, just shuffle, so choose the first tree node from the // tree. Entries.push_back(FirstEntries.front()); + VF = FirstEntries.front()->getVectorFactor(); } else { // Try to find nodes with the same vector factor. assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries."); @@ -13239,6 +13240,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( Entries.push_back(SecondEntries.front()); VF = std::max(Entries.front()->getVectorFactor(), Entries.back()->getVectorFactor()); + } else { + VF = Entries.front()->getVectorFactor(); } } @@ -13350,17 +13353,141 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( : Entries[Pair.first]->findLaneForValue(VL[Pair.second])); IsIdentity &= Mask[Idx] == Pair.second; } - switch (Entries.size()) { - case 1: - if (IsIdentity || EntryLanes.size() > 1 || VL.size() <= 2) - return TargetTransformInfo::SK_PermuteSingleSrc; - break; - case 2: - if (EntryLanes.size() > 2 || VL.size() <= 2) - return TargetTransformInfo::SK_PermuteTwoSrc; - break; - default: - break; + if (ForOrder || IsIdentity || Entries.empty()) { + switch (Entries.size()) { + case 1: + if (IsIdentity || EntryLanes.size() > 1 || VL.size() <= 2) + return TargetTransformInfo::SK_PermuteSingleSrc; + break; + case 2: + if (EntryLanes.size() > 2 || VL.size() <= 2) + return TargetTransformInfo::SK_PermuteTwoSrc; + break; + default: + break; + } + } else if (!isa(VL.front()->getType()) && + (EntryLanes.size() > Entries.size() || VL.size() <= 2)) { + // Do the cost estimation if shuffle beneficial than buildvector. + SmallVector SubMask(std::next(Mask.begin(), Part * VL.size()), + std::next(Mask.begin(), (Part + 1) * VL.size())); + int MinElement = SubMask.front(), MaxElement = SubMask.front(); + for (int Idx : SubMask) { + if (Idx == PoisonMaskElem) + continue; + if (MinElement == PoisonMaskElem || MinElement % VF > Idx % VF) + MinElement = Idx; + if (MaxElement == PoisonMaskElem || MaxElement % VF < Idx % VF) + MaxElement = Idx; + } + assert(MaxElement >= 0 && MinElement >= 0 && + MaxElement % VF >= MinElement % VF && + "Expected at least single element."); + unsigned NewVF = std::max( + VL.size(), getFullVectorNumberOfElements(*TTI, VL.front()->getType(), + (MaxElement % VF) - + (MinElement % VF) + 1)); + if (NewVF < VF) { + for_each(SubMask, [&](int &Idx) { + if (Idx == PoisonMaskElem) + return; + Idx = (Idx % VF) - (MinElement % VF) + + (Idx >= static_cast(VF) ? NewVF : 0); + }); + VF = NewVF; + } + + constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + auto *VecTy = getWidenedType(VL.front()->getType(), VF); + auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size()); + auto GetShuffleCost = [&, + &TTI = *TTI](ArrayRef Mask, + ArrayRef Entries, + VectorType *VecTy) -> InstructionCost { + if (Entries.size() == 1 && Entries.front()->getInterleaveFactor() > 0 && + ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Mask, Entries.front()->getInterleaveFactor())) + return TTI::TCC_Free; + return ::getShuffleCost(TTI, + Entries.size() > 1 ? TTI::SK_PermuteTwoSrc + : TTI::SK_PermuteSingleSrc, + VecTy, Mask, CostKind); + }; + InstructionCost ShuffleCost = GetShuffleCost(SubMask, Entries, VecTy); + InstructionCost FirstShuffleCost = 0; + SmallVector FirstMask(SubMask.begin(), SubMask.end()); + if (Entries.size() == 1 || !Entries[0]->isGather()) { + FirstShuffleCost = ShuffleCost; + } else { + // Transform mask to include only first entry. + APInt DemandedElts = APInt::getAllOnes(SubMask.size()); + bool IsIdentity = true; + for (auto [I, Idx] : enumerate(FirstMask)) { + if (Idx >= static_cast(VF)) { + Idx = PoisonMaskElem; + } else { + DemandedElts.clearBit(I); + if (Idx != PoisonMaskElem) + IsIdentity &= static_cast(I) == Idx; + } + } + if (!IsIdentity) + FirstShuffleCost = GetShuffleCost(FirstMask, Entries.front(), VecTy); + FirstShuffleCost += TTI->getScalarizationOverhead( + MaskVecTy, DemandedElts, /*Insert=*/true, + /*Extract=*/false, CostKind); + } + InstructionCost SecondShuffleCost = 0; + SmallVector SecondMask(SubMask.begin(), SubMask.end()); + if (Entries.size() == 1 || !Entries[1]->isGather()) { + SecondShuffleCost = ShuffleCost; + } else { + // Transform mask to include only first entry. + APInt DemandedElts = APInt::getAllOnes(SubMask.size()); + bool IsIdentity = true; + for (auto [I, Idx] : enumerate(SecondMask)) { + if (Idx < static_cast(VF) && Idx >= 0) { + Idx = PoisonMaskElem; + } else { + DemandedElts.clearBit(I); + if (Idx != PoisonMaskElem) { + Idx -= VF; + IsIdentity &= static_cast(I) == Idx; + } + } + } + if (!IsIdentity) + SecondShuffleCost = GetShuffleCost(SecondMask, Entries[1], VecTy); + SecondShuffleCost += TTI->getScalarizationOverhead( + MaskVecTy, DemandedElts, /*Insert=*/true, + /*Extract=*/false, CostKind); + } + APInt DemandedElts = APInt::getAllOnes(SubMask.size()); + for (auto [I, Idx] : enumerate(SubMask)) + if (Idx == PoisonMaskElem) + DemandedElts.clearBit(I); + InstructionCost BuildVectorCost = + TTI->getScalarizationOverhead(MaskVecTy, DemandedElts, /*Insert=*/true, + /*Extract=*/false, CostKind); + const TreeEntry *BestEntry = nullptr; + if (FirstShuffleCost < ShuffleCost) { + copy(FirstMask, std::next(Mask.begin(), Part * VL.size())); + BestEntry = Entries.front(); + ShuffleCost = FirstShuffleCost; + } + if (SecondShuffleCost < ShuffleCost) { + copy(SecondMask, std::next(Mask.begin(), Part * VL.size())); + BestEntry = Entries[1]; + ShuffleCost = SecondShuffleCost; + } + if (BuildVectorCost >= ShuffleCost) { + if (BestEntry) { + Entries.clear(); + Entries.push_back(BestEntry); + } + return Entries.size() > 1 ? TargetTransformInfo::SK_PermuteTwoSrc + : TargetTransformInfo::SK_PermuteSingleSrc; + } } Entries.clear(); // Clear the corresponding mask elements. diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index a18156744a36b..7107d2be579c6 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -399,13 +399,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) { ; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> @@ -436,13 +436,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) { ; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> @@ -476,13 +476,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> @@ -513,13 +513,13 @@ define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll index 30bae7e926289..cada8ab240cc7 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll @@ -734,7 +734,7 @@ define void @shuffle2() vscale_range(2,2) { define void @multipart() vscale_range(2,2) { ; RV32-LABEL: 'multipart' ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16a = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> @@ -743,18 +743,18 @@ define void @multipart() vscale_range(2,2) { ; RV32-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v323 = shufflevector <3 x i32> poison, <3 x i32> poison, <3 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64a = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64a = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64b = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64ab = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64a = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64a = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64b = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> zeroinitializer ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64ab = shufflevector <4 x double> poison, <4 x double> poison, <4 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; RV64-LABEL: 'multipart' ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16a = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> @@ -763,18 +763,18 @@ define void @multipart() vscale_range(2,2) { ; RV64-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v323 = shufflevector <3 x i32> poison, <3 x i32> poison, <3 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64a = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64a = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64b = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64ab = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v64d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64a = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64a = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64b = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> zeroinitializer ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f64ab = shufflevector <4 x double> poison, <4 x double> poison, <4 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'multipart' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16a = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> @@ -783,11 +783,11 @@ define void @multipart() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v323 = shufflevector <3 x i32> poison, <3 x i32> poison, <3 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64a = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64a = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64b = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64ab = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64a = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64a = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64b = shufflevector <2 x double> poison, <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64ab = shufflevector <4 x double> poison, <4 x double> poison, <4 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll index 3fa54588625f1..330cbc07bf33a 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll @@ -47,7 +47,7 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> @@ -86,7 +86,7 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -127,7 +127,7 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> @@ -174,7 +174,7 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> @@ -336,7 +336,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> @@ -345,7 +345,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> @@ -354,7 +354,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll index 39c935fff6b76..0215f658cbe40 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll @@ -4,9 +4,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 @@ -19,20 +19,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -50,20 +50,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' @@ -82,22 +82,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -118,22 +118,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -154,58 +154,42 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128 ; SSE2-LABEL: 'test_vXf16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXf16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXf16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXf16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXf16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> @@ -260,66 +244,50 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> @@ -332,11 +300,11 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -344,8 +312,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -353,8 +321,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -362,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -371,36 +339,18 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> @@ -415,10 +365,10 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE2-LABEL: 'test_vXi1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi1' @@ -426,8 +376,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi1' @@ -435,8 +385,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi1' @@ -444,8 +394,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi1' @@ -453,17 +403,17 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi1' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> @@ -483,22 +433,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll index 2a89924dc7780..b20986ed6657d 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll @@ -4,9 +4,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 @@ -19,26 +19,26 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> @@ -50,26 +50,26 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> @@ -82,29 +82,29 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> @@ -118,29 +118,29 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> @@ -154,58 +154,42 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128 ; SSE2-LABEL: 'test_vXf16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXf16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXf16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXf16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXf16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> @@ -260,66 +244,50 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> @@ -332,11 +300,11 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -344,8 +312,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -353,8 +321,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -362,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -371,36 +339,18 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> @@ -415,10 +365,10 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE2-LABEL: 'test_vXi1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi1' @@ -426,8 +376,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi1' @@ -435,8 +385,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi1' @@ -444,8 +394,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi1' @@ -453,17 +403,17 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi1' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> @@ -483,29 +433,29 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll index 848e7b4e611a7..56d8cadb18e48 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll @@ -4,9 +4,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 @@ -19,20 +19,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -50,20 +50,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' @@ -82,22 +82,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -118,22 +118,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -154,58 +154,42 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128 ; SSE2-LABEL: 'test_vXf16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXf16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXf16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXf16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXf16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> @@ -260,66 +244,50 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> @@ -332,11 +300,11 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -344,8 +312,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -353,8 +321,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -362,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -371,36 +339,18 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> @@ -415,10 +365,10 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE2-LABEL: 'test_vXi1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi1' @@ -426,8 +376,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi1' @@ -435,8 +385,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi1' @@ -444,8 +394,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi1' @@ -453,17 +403,17 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi1' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> @@ -483,22 +433,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll index 4c6d1ccd5ca34..56f56c3c2942a 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll @@ -4,9 +4,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 @@ -19,20 +19,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -50,20 +50,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' @@ -82,22 +82,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE-LABEL: 'test_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -118,22 +118,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -154,58 +154,42 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128 ; SSE2-LABEL: 'test_vXf16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXf16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXf16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXf16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512-LABEL: 'test_vXf16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> @@ -260,66 +244,50 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> @@ -332,11 +300,11 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -344,8 +312,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -353,8 +321,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -362,8 +330,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -371,36 +339,18 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> @@ -415,10 +365,10 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE2-LABEL: 'test_vXi1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi1' @@ -426,8 +376,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi1' @@ -435,8 +385,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi1' @@ -444,8 +394,8 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi1' @@ -453,17 +403,17 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi1' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> @@ -483,22 +433,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_upper_vXf32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_upper_vXf32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_upper_vXf32' diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll index 289807a808d5d..3cab4a4da3f8e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll @@ -48,12 +48,12 @@ define void @test() { ; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ] ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ [[TMP31]], %[[BB77]] ], [ [[TMP37:%.*]], %[[BB78]] ] ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP20]], <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> [[TMP22]], <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP40]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP13]] ; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP38]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll index 8093285ad8717..a504f3ed02014 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll @@ -8,56 +8,34 @@ define fastcc i64 @zot(float %arg, float %arg1, float %arg2, float %arg3, float %arg4, ptr %arg5, i1 %arg6, i1 %arg7, i1 %arg8) { ; CHECK-LABEL: @zot( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[VAL:%.*]] = fmul fast float 0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[VAL9:%.*]] = fmul fast float 0.000000e+00, [[ARG:%.*]] -; CHECK-NEXT: [[VAL10:%.*]] = fmul fast float [[ARG3:%.*]], 1.000000e+00 -; CHECK-NEXT: [[VAL11:%.*]] = fmul fast float [[ARG3]], 1.000000e+00 -; CHECK-NEXT: [[VAL12:%.*]] = fadd fast float [[ARG3]], 1.000000e+00 -; CHECK-NEXT: [[VAL13:%.*]] = fadd fast float [[VAL12]], 2.000000e+00 -; CHECK-NEXT: [[VAL14:%.*]] = fadd fast float 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[VAL15:%.*]] = fadd fast float [[VAL14]], 1.000000e+00 -; CHECK-NEXT: [[VAL16:%.*]] = fadd fast float [[ARG3]], 1.000000e+00 -; CHECK-NEXT: [[VAL17:%.*]] = fadd fast float [[ARG3]], 1.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[ARG]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[ARG3:%.*]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[ARG3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP2]], <2 x float> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP6]], ; CHECK-NEXT: br i1 [[ARG6:%.*]], label [[BB18:%.*]], label [[BB57:%.*]] ; CHECK: bb18: -; CHECK-NEXT: [[VAL19:%.*]] = phi float [ [[VAL13]], [[BB:%.*]] ] -; CHECK-NEXT: [[VAL20:%.*]] = phi float [ [[VAL15]], [[BB]] ] -; CHECK-NEXT: [[VAL21:%.*]] = phi float [ [[VAL16]], [[BB]] ] -; CHECK-NEXT: [[VAL22:%.*]] = phi float [ [[VAL17]], [[BB]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ [[TMP7]], [[BB:%.*]] ] +; CHECK-NEXT: [[VAL16:%.*]] = extractelement <4 x float> [[TMP7]], i32 2 ; CHECK-NEXT: [[VAL23:%.*]] = fmul fast float [[VAL16]], 2.000000e+00 +; CHECK-NEXT: [[VAL17:%.*]] = extractelement <4 x float> [[TMP7]], i32 3 ; CHECK-NEXT: [[VAL24:%.*]] = fmul fast float [[VAL17]], 3.000000e+00 ; CHECK-NEXT: br i1 [[ARG7:%.*]], label [[BB25:%.*]], label [[BB57]] ; CHECK: bb25: -; CHECK-NEXT: [[VAL26:%.*]] = phi float [ [[VAL19]], [[BB18]] ] -; CHECK-NEXT: [[VAL27:%.*]] = phi float [ [[VAL20]], [[BB18]] ] -; CHECK-NEXT: [[VAL28:%.*]] = phi float [ [[VAL21]], [[BB18]] ] -; CHECK-NEXT: [[VAL29:%.*]] = phi float [ [[VAL22]], [[BB18]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x float> [ [[TMP8]], [[BB18]] ] ; CHECK-NEXT: br label [[BB30:%.*]] ; CHECK: bb30: ; CHECK-NEXT: [[VAL31:%.*]] = phi float [ [[VAL55:%.*]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ] ; CHECK-NEXT: [[VAL32:%.*]] = phi float [ [[VAL9]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ] -; CHECK-NEXT: [[VAL33:%.*]] = load i8, ptr [[ARG5:%.*]], align 1 -; CHECK-NEXT: [[VAL34:%.*]] = uitofp i8 [[VAL33]] to float -; CHECK-NEXT: [[VAL35:%.*]] = getelementptr inbounds i8, ptr [[ARG5]], i64 1 -; CHECK-NEXT: [[VAL36:%.*]] = load i8, ptr [[VAL35]], align 1 -; CHECK-NEXT: [[VAL37:%.*]] = uitofp i8 [[VAL36]] to float -; CHECK-NEXT: [[VAL38:%.*]] = getelementptr inbounds i8, ptr [[ARG5]], i64 2 -; CHECK-NEXT: [[VAL39:%.*]] = load i8, ptr [[VAL38]], align 1 -; CHECK-NEXT: [[VAL40:%.*]] = uitofp i8 [[VAL39]] to float -; CHECK-NEXT: [[VAL41:%.*]] = getelementptr inbounds i8, ptr [[ARG5]], i64 3 -; CHECK-NEXT: [[VAL42:%.*]] = load i8, ptr [[VAL41]], align 1 -; CHECK-NEXT: [[VAL43:%.*]] = uitofp i8 [[VAL42]] to float -; CHECK-NEXT: [[VAL44:%.*]] = fsub fast float [[VAL34]], [[VAL]] -; CHECK-NEXT: [[VAL45:%.*]] = fsub fast float [[VAL37]], [[VAL9]] -; CHECK-NEXT: [[VAL46:%.*]] = fsub fast float [[VAL40]], [[VAL10]] -; CHECK-NEXT: [[VAL47:%.*]] = fsub fast float [[VAL43]], [[VAL11]] -; CHECK-NEXT: [[VAL48:%.*]] = fmul fast float [[VAL44]], [[VAL26]] -; CHECK-NEXT: [[VAL49:%.*]] = fmul fast float [[VAL45]], [[VAL27]] -; CHECK-NEXT: [[VAL50:%.*]] = fadd fast float [[VAL49]], [[VAL48]] -; CHECK-NEXT: [[VAL51:%.*]] = fmul fast float [[VAL46]], [[VAL28]] -; CHECK-NEXT: [[VAL52:%.*]] = fadd fast float [[VAL50]], [[VAL51]] -; CHECK-NEXT: [[VAL53:%.*]] = fmul fast float [[VAL47]], [[VAL29]] -; CHECK-NEXT: [[VAL54:%.*]] = fadd fast float [[VAL52]], [[VAL53]] +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARG5:%.*]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = uitofp <4 x i8> [[TMP12]] to <4 x float> +; CHECK-NEXT: [[TMP14:%.*]] = fsub fast <4 x float> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[VAL54:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP15]]) ; CHECK-NEXT: [[VAL55]] = tail call fast float @llvm.minnum.f32(float [[VAL31]], float [[ARG1:%.*]]) ; CHECK-NEXT: [[VAL56:%.*]] = tail call fast float @llvm.maxnum.f32(float [[ARG2:%.*]], float [[VAL54]]) ; CHECK-NEXT: call void @ham(float [[VAL55]], float [[VAL56]]) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 912d60d0cc386..257e4660c80aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -30,11 +30,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; CHECK-NEXT: [[TMP84:%.*]] = zext i8 [[TMP29]] to i32 +; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP6]] to i32 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32> @@ -50,7 +50,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> -; CHECK-NEXT: [[TMP83:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]] @@ -61,14 +61,14 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] ; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) ; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] -; CHECK-NEXT: [[TMP43:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] ; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <2 x i32> [[TMP43]], i32 0 -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x i32> [[TMP43]], i32 1 -; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP34]], [[TMP73]] -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0 -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1 -; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP48]], [[TMP47]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0 +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1 +; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP45]], [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0 +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1 +; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]] ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 ; CHECK-NEXT: [[TMP53:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr null, align 1 @@ -79,12 +79,12 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] ; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = sub <2 x i32> [[TMP60]], [[TMP76]] -; CHECK-NEXT: [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], splat (i32 16) -; CHECK-NEXT: [[TMP90:%.*]] = add <2 x i32> [[TMP46]], [[TMP59]] +; CHECK-NEXT: [[TMP81:%.*]] = sub <2 x i32> [[TMP48]], [[TMP76]] +; CHECK-NEXT: [[TMP167:%.*]] = shl <2 x i32> [[TMP81]], splat (i32 16) +; CHECK-NEXT: [[TMP75:%.*]] = add <2 x i32> [[TMP167]], [[TMP59]] ; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 ; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 ; CHECK-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 @@ -93,236 +93,236 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 ; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] -; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; CHECK-NEXT: [[TMP98:%.*]] = zext <2 x i8> [[TMP75]] to <2 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; CHECK-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP100]] to <2 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP98]], [[TMP103]] -; CHECK-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) -; CHECK-NEXT: [[TMP74:%.*]] = add <2 x i32> [[TMP70]], [[TMP65]] -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <2 x i32> [[TMP90]], i32 0 -; CHECK-NEXT: [[TMP71:%.*]] = extractelement <2 x i32> [[TMP90]], i32 1 -; CHECK-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP71]], [[TMP78]] -; CHECK-NEXT: [[SUB51_3:%.*]] = sub i32 [[TMP78]], [[TMP71]] -; CHECK-NEXT: [[TMP80:%.*]] = extractelement <2 x i32> [[TMP74]], i32 0 -; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x i32> [[TMP74]], i32 1 -; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP81]], [[TMP80]] -; CHECK-NEXT: [[TMP107:%.*]] = sub i32 [[TMP80]], [[TMP81]] -; CHECK-NEXT: [[ADD48_4:%.*]] = add i32 [[ADD55_3]], [[ADD48_3]] -; CHECK-NEXT: [[TMP113:%.*]] = shufflevector <2 x i32> [[TMP43]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP122:%.*]] = insertelement <2 x i32> [[TMP113]], i32 [[ADD48_3]], i32 0 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i32> [[TMP43]], i32 [[ADD55_3]], i32 0 -; CHECK-NEXT: [[TMP123:%.*]] = sub <2 x i32> [[TMP122]], [[TMP72]] -; CHECK-NEXT: [[ADD55_4:%.*]] = add i32 [[TMP107]], [[SUB51_3]] -; CHECK-NEXT: [[TMP126:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP129:%.*]] = insertelement <2 x i32> [[TMP126]], i32 [[SUB51_3]], i32 0 -; CHECK-NEXT: [[TMP130:%.*]] = insertelement <2 x i32> [[TMP44]], i32 [[TMP107]], i32 0 -; CHECK-NEXT: [[TMP143:%.*]] = sub <2 x i32> [[TMP129]], [[TMP130]] -; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_4]], [[ADD48_2]] -; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_4]] -; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[TMP77]], 15 -; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP170:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) +; CHECK-NEXT: [[TMP171:%.*]] = zext <2 x i8> [[TMP170]] to <2 x i32> +; CHECK-NEXT: [[TMP172:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 +; CHECK-NEXT: [[TMP173:%.*]] = zext <2 x i8> [[TMP172]] to <2 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = sub <2 x i32> [[TMP171]], [[TMP173]] +; CHECK-NEXT: [[TMP67:%.*]] = shl <2 x i32> [[TMP66]], splat (i32 16) +; CHECK-NEXT: [[TMP69:%.*]] = add <2 x i32> [[TMP67]], [[TMP65]] +; CHECK-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP75]], i32 0 +; CHECK-NEXT: [[TMP197:%.*]] = extractelement <2 x i32> [[TMP75]], i32 1 +; CHECK-NEXT: [[SUB59:%.*]] = add i32 [[TMP197]], [[TMP176]] +; CHECK-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP176]], [[TMP197]] +; CHECK-NEXT: [[ADD112_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 0 +; CHECK-NEXT: [[XOR_I63_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 1 +; CHECK-NEXT: [[SUB59_1:%.*]] = add i32 [[XOR_I63_2]], [[ADD112_2]] +; CHECK-NEXT: [[SUB47_3:%.*]] = sub i32 [[ADD112_2]], [[XOR_I63_2]] +; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[SUB59_1]], [[SUB59]] +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP70]], i32 [[SUB59]], i32 0 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i32> [[TMP34]], i32 [[SUB59_1]], i32 0 +; CHECK-NEXT: [[TMP222:%.*]] = sub <2 x i32> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] +; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[SUB45_3]], i32 0 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP44]], i32 [[SUB47_3]], i32 0 +; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]] +; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]] +; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]] +; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP77]], 15 +; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 +; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 +; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP45]], 15 +; CHECK-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 +; CHECK-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 +; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]] +; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]] +; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV9_2]], 15 ; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 ; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; CHECK-NEXT: [[ADD94_5:%.*]] = add i32 [[ADD55_4]], [[ADD55_2]] -; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_4]] -; CHECK-NEXT: [[SHR_I_2:%.*]] = lshr i32 [[TMP83]], 15 -; CHECK-NEXT: [[AND_I_2:%.*]] = and i32 [[SHR_I_2]], 65537 -; CHECK-NEXT: [[MUL_I_2:%.*]] = mul i32 [[AND_I_2]], 65535 -; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[TMP84]], 15 +; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15 ; CHECK-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; CHECK-NEXT: [[ADD94_2:%.*]] = mul i32 [[AND_I50_1]], 65535 -; CHECK-NEXT: [[TMP144:%.*]] = extractelement <2 x i32> [[TMP123]], i32 0 -; CHECK-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP123]], i32 1 -; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[TMP144]], [[TMP145]] -; CHECK-NEXT: [[TMP169:%.*]] = sub i32 [[TMP145]], [[TMP144]] +; CHECK-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x i32> [[TMP222]], i32 0 +; CHECK-NEXT: [[TMP87:%.*]] = extractelement <2 x i32> [[TMP222]], i32 1 +; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[ADD112_1:%.*]] = sub i32 [[TMP87]], [[TMP86]] ; CHECK-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 ; CHECK-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 ; CHECK-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; CHECK-NEXT: [[TMP146:%.*]] = extractelement <2 x i32> [[TMP143]], i32 0 -; CHECK-NEXT: [[TMP147:%.*]] = extractelement <2 x i32> [[TMP143]], i32 1 -; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP146]], [[TMP147]] -; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP147]], [[TMP146]] -; CHECK-NEXT: [[SHR_I49_4:%.*]] = lshr i32 [[CONV1]], 15 -; CHECK-NEXT: [[AND_I50_4:%.*]] = and i32 [[SHR_I49_4]], 65537 -; CHECK-NEXT: [[MUL_I51_4:%.*]] = mul i32 [[AND_I50_4]], 65535 -; CHECK-NEXT: [[TMP66:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP66]] to <2 x i32> -; CHECK-NEXT: [[TMP148:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP85:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> -; CHECK-NEXT: [[TMP149:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP149]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> -; CHECK-NEXT: [[TMP150:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP89:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> -; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP108]], [[TMP89]] -; CHECK-NEXT: [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], splat (i32 16) -; CHECK-NEXT: [[TMP112:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP120:%.*]] = zext <2 x i8> [[TMP112]] to <2 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP149]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP128:%.*]] = zext <2 x i8> [[TMP94]] to <2 x i32> -; CHECK-NEXT: [[TMP131:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP132:%.*]] = zext <2 x i8> [[TMP131]] to <2 x i32> -; CHECK-NEXT: [[TMP95:%.*]] = sub <2 x i32> [[TMP128]], [[TMP132]] -; CHECK-NEXT: [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], splat (i32 16) -; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 -; CHECK-NEXT: [[TMP117:%.*]] = sub <2 x i32> [[TMP97]], [[TMP120]] -; CHECK-NEXT: [[TMP105:%.*]] = add <2 x i32> [[TMP96]], [[TMP117]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 -; CHECK-NEXT: [[TMP119:%.*]] = sub <2 x i32> [[TMP86]], [[TMP85]] -; CHECK-NEXT: [[TMP92:%.*]] = add <2 x i32> [[TMP88]], [[TMP119]] -; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <2 x i32> [[TMP105]], <2 x i32> [[TMP92]], <2 x i32> -; CHECK-NEXT: [[TMP101:%.*]] = add <2 x i32> [[TMP105]], [[TMP92]] -; CHECK-NEXT: [[TMP151:%.*]] = sub <2 x i32> [[TMP92]], [[TMP105]] -; CHECK-NEXT: [[TMP111:%.*]] = extractelement <2 x i32> [[TMP101]], i32 0 -; CHECK-NEXT: [[TMP99:%.*]] = extractelement <2 x i32> [[TMP101]], i32 1 -; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP99]], [[TMP111]] -; CHECK-NEXT: [[SUB51:%.*]] = sub i32 [[TMP111]], [[TMP99]] -; CHECK-NEXT: [[TMP153:%.*]] = extractelement <2 x i32> [[TMP151]], i32 0 -; CHECK-NEXT: [[TMP157:%.*]] = extractelement <2 x i32> [[TMP151]], i32 1 -; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[TMP157]], [[TMP153]] -; CHECK-NEXT: [[SUB59:%.*]] = sub i32 [[TMP153]], [[TMP157]] -; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP99]], 15 +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 +; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP89]], [[TMP88]] +; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV1]], 15 +; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 +; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 +; CHECK-NEXT: [[TMP90:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 +; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP90]] to <2 x i32> +; CHECK-NEXT: [[TMP92:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 +; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32> +; CHECK-NEXT: [[TMP95:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> +; CHECK-NEXT: [[TMP98:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32> +; CHECK-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP97]], [[TMP100]] +; CHECK-NEXT: [[TMP224:%.*]] = shl <2 x i32> [[TMP101]], splat (i32 16) +; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP104:%.*]] = zext <2 x i8> [[TMP103]] to <2 x i32> +; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP106:%.*]] = zext <2 x i8> [[TMP105]] to <2 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32> +; CHECK-NEXT: [[TMP109:%.*]] = sub <2 x i32> [[TMP106]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = shl <2 x i32> [[TMP109]], splat (i32 16) +; CHECK-NEXT: [[TMP111:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 +; CHECK-NEXT: [[TMP112:%.*]] = sub <2 x i32> [[TMP111]], [[TMP104]] +; CHECK-NEXT: [[TMP113:%.*]] = add <2 x i32> [[TMP110]], [[TMP112]] +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 +; CHECK-NEXT: [[TMP115:%.*]] = sub <2 x i32> [[TMP114]], [[TMP94]] +; CHECK-NEXT: [[TMP116:%.*]] = add <2 x i32> [[TMP224]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i32> [[TMP113]], <2 x i32> [[TMP116]], <2 x i32> +; CHECK-NEXT: [[TMP126:%.*]] = add <2 x i32> [[TMP113]], [[TMP116]] +; CHECK-NEXT: [[TMP119:%.*]] = sub <2 x i32> [[TMP116]], [[TMP113]] +; CHECK-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP126]], i32 0 +; CHECK-NEXT: [[TMP127:%.*]] = extractelement <2 x i32> [[TMP126]], i32 1 +; CHECK-NEXT: [[ADD48:%.*]] = add i32 [[TMP127]], [[TMP120]] +; CHECK-NEXT: [[TMP166:%.*]] = sub i32 [[TMP120]], [[TMP127]] +; CHECK-NEXT: [[TMP128:%.*]] = extractelement <2 x i32> [[TMP119]], i32 0 +; CHECK-NEXT: [[TMP129:%.*]] = extractelement <2 x i32> [[TMP119]], i32 1 +; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP129]], [[TMP128]] +; CHECK-NEXT: [[SUB60:%.*]] = sub i32 [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP127]], 15 +; CHECK-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 +; CHECK-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 +; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP129]], 15 ; CHECK-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 ; CHECK-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; CHECK-NEXT: [[SHR_I59_4:%.*]] = lshr i32 [[TMP157]], 15 -; CHECK-NEXT: [[AND_I60_4:%.*]] = and i32 [[SHR_I59_4]], 65537 -; CHECK-NEXT: [[MUL_I61_4:%.*]] = mul i32 [[AND_I60_4]], 65535 -; CHECK-NEXT: [[TMP104:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; CHECK-NEXT: [[TMP110:%.*]] = zext <2 x i8> [[TMP104]] to <2 x i32> -; CHECK-NEXT: [[TMP158:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; CHECK-NEXT: [[TMP114:%.*]] = shufflevector <4 x i8> [[TMP158]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP133:%.*]] = zext <2 x i8> [[TMP114]] to <2 x i32> -; CHECK-NEXT: [[TMP121:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP116:%.*]] = shufflevector <4 x i8> [[TMP121]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP115:%.*]] = zext <2 x i8> [[TMP116]] to <2 x i32> -; CHECK-NEXT: [[TMP159:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP118:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP118]] to <2 x i32> -; CHECK-NEXT: [[TMP124:%.*]] = sub <2 x i32> [[TMP115]], [[TMP134]] -; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], splat (i32 16) -; CHECK-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP158]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP191:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> -; CHECK-NEXT: [[TMP160:%.*]] = shufflevector <4 x i8> [[TMP121]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP161:%.*]] = zext <2 x i8> [[TMP160]] to <2 x i32> -; CHECK-NEXT: [[TMP171:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP172:%.*]] = zext <2 x i8> [[TMP171]] to <2 x i32> -; CHECK-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP161]], [[TMP172]] -; CHECK-NEXT: [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) -; CHECK-NEXT: [[TMP137:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV33_1]], i32 1 -; CHECK-NEXT: [[TMP173:%.*]] = sub <2 x i32> [[TMP137]], [[TMP191]] -; CHECK-NEXT: [[TMP174:%.*]] = add <2 x i32> [[TMP136]], [[TMP173]] -; CHECK-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV_1]], i32 0 -; CHECK-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP133]] -; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP125]], [[TMP141]] -; CHECK-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP174]], [[TMP192]] -; CHECK-NEXT: [[TMP155:%.*]] = sub <2 x i32> [[TMP192]], [[TMP174]] -; CHECK-NEXT: [[TMP139:%.*]] = extractelement <2 x i32> [[TMP156]], i32 0 -; CHECK-NEXT: [[TMP142:%.*]] = extractelement <2 x i32> [[TMP156]], i32 1 -; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP142]], [[TMP139]] -; CHECK-NEXT: [[SUB45_1:%.*]] = sub i32 [[TMP139]], [[TMP142]] -; CHECK-NEXT: [[TMP138:%.*]] = extractelement <2 x i32> [[TMP155]], i32 0 -; CHECK-NEXT: [[SUB47_1:%.*]] = extractelement <2 x i32> [[TMP155]], i32 1 -; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[SUB47_1]], [[TMP138]] -; CHECK-NEXT: [[SUB59_1:%.*]] = sub i32 [[TMP138]], [[SUB47_1]] -; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP142]], 15 +; CHECK-NEXT: [[TMP130:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 +; CHECK-NEXT: [[TMP131:%.*]] = zext <2 x i8> [[TMP130]] to <2 x i32> +; CHECK-NEXT: [[TMP132:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 +; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> +; CHECK-NEXT: [[TMP135:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP136:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP137:%.*]] = zext <2 x i8> [[TMP136]] to <2 x i32> +; CHECK-NEXT: [[TMP138:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP139:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP140:%.*]] = zext <2 x i8> [[TMP139]] to <2 x i32> +; CHECK-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP137]], [[TMP140]] +; CHECK-NEXT: [[TMP142:%.*]] = shl <2 x i32> [[TMP141]], splat (i32 16) +; CHECK-NEXT: [[TMP143:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP144:%.*]] = zext <2 x i8> [[TMP143]] to <2 x i32> +; CHECK-NEXT: [[TMP145:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP146:%.*]] = zext <2 x i8> [[TMP145]] to <2 x i32> +; CHECK-NEXT: [[TMP147:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP148:%.*]] = zext <2 x i8> [[TMP147]] to <2 x i32> +; CHECK-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP146]], [[TMP148]] +; CHECK-NEXT: [[TMP150:%.*]] = shl <2 x i32> [[TMP149]], splat (i32 16) +; CHECK-NEXT: [[TMP151:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV33_1]], i32 1 +; CHECK-NEXT: [[TMP225:%.*]] = sub <2 x i32> [[TMP151]], [[TMP144]] +; CHECK-NEXT: [[TMP153:%.*]] = add <2 x i32> [[TMP150]], [[TMP225]] +; CHECK-NEXT: [[TMP154:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV_1]], i32 0 +; CHECK-NEXT: [[TMP155:%.*]] = sub <2 x i32> [[TMP154]], [[TMP134]] +; CHECK-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP142]], [[TMP155]] +; CHECK-NEXT: [[TMP157:%.*]] = add <2 x i32> [[TMP153]], [[TMP156]] +; CHECK-NEXT: [[TMP158:%.*]] = sub <2 x i32> [[TMP156]], [[TMP153]] +; CHECK-NEXT: [[TMP159:%.*]] = extractelement <2 x i32> [[TMP157]], i32 0 +; CHECK-NEXT: [[TMP160:%.*]] = extractelement <2 x i32> [[TMP157]], i32 1 +; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP160]], [[TMP159]] +; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP159]], [[TMP160]] +; CHECK-NEXT: [[TMP161:%.*]] = extractelement <2 x i32> [[TMP158]], i32 0 +; CHECK-NEXT: [[TMP162:%.*]] = extractelement <2 x i32> [[TMP158]], i32 1 +; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP162]], [[TMP161]] +; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP161]], [[TMP162]] +; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP160]], 15 ; CHECK-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 ; CHECK-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15 +; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP162]], 15 ; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 ; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; CHECK-NEXT: [[TMP154:%.*]] = lshr <2 x i32> [[TMP110]], splat (i32 15) -; CHECK-NEXT: [[TMP184:%.*]] = and <2 x i32> [[TMP154]], splat (i32 65537) -; CHECK-NEXT: [[TMP195:%.*]] = mul <2 x i32> [[TMP184]], splat (i32 65535) -; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD55]] -; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] -; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] -; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] -; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] -; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] -; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I51_3]], [[ADD103]] +; CHECK-NEXT: [[TMP163:%.*]] = lshr <2 x i32> [[TMP131]], splat (i32 15) +; CHECK-NEXT: [[TMP164:%.*]] = and <2 x i32> [[TMP163]], splat (i32 65537) +; CHECK-NEXT: [[TMP165:%.*]] = mul <2 x i32> [[TMP164]], splat (i32 65535) +; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] +; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] +; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD95]], [[ADD78]] +; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]] +; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]] +; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] ; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]] -; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I_1]], [[ADD105]] -; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP34]] +; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] +; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP45]] ; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP142]] -; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61_1]], [[SUB106]] -; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP99]] +; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]] +; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] +; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]] ; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] ; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] +; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]] +; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD55_1]], [[ADD55]] +; CHECK-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]] ; CHECK-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]] ; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; CHECK-NEXT: [[ADD103_2:%.*]] = add i32 [[ADD94_5]], [[ADD103_1]] -; CHECK-NEXT: [[SUB104_2:%.*]] = sub i32 [[ADD103_1]], [[ADD94_5]] -; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB104_1]] -; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB104_1]], [[SUB102_1]] -; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_2]], [[ADD103_2]] -; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[TMP83]] -; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[ADD94_2]], [[ADD105_1]] -; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP84]] -; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_2]] -; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[SUB47_1]] -; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_4]], [[SUB106_1]] -; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP157]] -; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] +; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] +; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] +; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] +; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[CONV9_2]] +; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] +; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[CONV_2]] +; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] +; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]] +; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] +; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]] +; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]] ; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] -; CHECK-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB45_1]], [[SUB51]] -; CHECK-NEXT: [[TMP170:%.*]] = sub i32 [[SUB51]], [[SUB45_1]] -; CHECK-NEXT: [[TMP162:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 -; CHECK-NEXT: [[TMP163:%.*]] = shufflevector <2 x i32> [[TMP162]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP164:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 -; CHECK-NEXT: [[TMP165:%.*]] = shufflevector <2 x i32> [[TMP164]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP166:%.*]] = add <2 x i32> [[TMP163]], [[TMP165]] -; CHECK-NEXT: [[TMP167:%.*]] = sub <2 x i32> [[TMP163]], [[TMP165]] -; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP166]], <2 x i32> [[TMP167]], <2 x i32> -; CHECK-NEXT: [[ADD105_2:%.*]] = add i32 [[TMP169]], [[TMP170]] -; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[TMP170]], [[TMP169]] -; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] -; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; CHECK-NEXT: [[TMP197:%.*]] = add <2 x i32> [[TMP195]], [[TMP168]] -; CHECK-NEXT: [[TMP152:%.*]] = xor <2 x i32> [[TMP197]], [[TMP110]] -; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP111]], 15 -; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; CHECK-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP111]] -; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] -; CHECK-NEXT: [[TMP175:%.*]] = extractelement <2 x i32> [[TMP152]], i32 0 -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP175]] -; CHECK-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP152]], i32 1 -; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP176]] -; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] -; CHECK-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB59_1]], [[SUB59]] -; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB59_1]] +; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] +; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]] +; CHECK-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[TMP166]] +; CHECK-NEXT: [[TMP204:%.*]] = sub i32 [[TMP166]], [[SUB51_1]] ; CHECK-NEXT: [[TMP177:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 ; CHECK-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP179:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 ; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP179]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP181:%.*]] = add <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP182:%.*]] = sub <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP183:%.*]] = shufflevector <2 x i32> [[TMP181]], <2 x i32> [[TMP182]], <2 x i32> -; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] -; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] -; CHECK-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_3]] -; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV1]] +; CHECK-NEXT: [[TMP199:%.*]] = add <2 x i32> [[TMP178]], [[TMP180]] +; CHECK-NEXT: [[TMP200:%.*]] = sub <2 x i32> [[TMP178]], [[TMP180]] +; CHECK-NEXT: [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> [[TMP200]], <2 x i32> +; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP204]] +; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[TMP204]], [[ADD112_1]] +; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD113_1]] +; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] +; CHECK-NEXT: [[TMP208:%.*]] = add <2 x i32> [[TMP165]], [[TMP201]] +; CHECK-NEXT: [[TMP209:%.*]] = xor <2 x i32> [[TMP208]], [[TMP131]] +; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP120]], 15 +; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 +; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 +; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] +; CHECK-NEXT: [[XOR_I63_4:%.*]] = xor i32 [[ADD_I62_2]], [[TMP120]] +; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_2]] +; CHECK-NEXT: [[TMP211:%.*]] = extractelement <2 x i32> [[TMP209]], i32 0 +; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP211]] +; CHECK-NEXT: [[TMP212:%.*]] = extractelement <2 x i32> [[TMP209]], i32 1 +; CHECK-NEXT: [[ADD112_4:%.*]] = add i32 [[ADD110_2]], [[TMP212]] +; CHECK-NEXT: [[ADD113_4:%.*]] = add i32 [[ADD112_4]], [[XOR_I63_4]] +; CHECK-NEXT: [[ADD78_4:%.*]] = add i32 [[SUB59_2]], [[SUB60]] +; CHECK-NEXT: [[SUB86_4:%.*]] = sub i32 [[SUB60]], [[SUB59_2]] +; CHECK-NEXT: [[TMP213:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_4]], i32 0 +; CHECK-NEXT: [[TMP214:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP215:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 +; CHECK-NEXT: [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP215]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP217:%.*]] = add <2 x i32> [[TMP214]], [[TMP216]] +; CHECK-NEXT: [[TMP218:%.*]] = sub <2 x i32> [[TMP214]], [[TMP216]] +; CHECK-NEXT: [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP217]], <2 x i32> [[TMP218]], <2 x i32> +; CHECK-NEXT: [[ADD105_4:%.*]] = add i32 [[SUB102_3]], [[SUB86_4]] +; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_4]], [[SUB102_3]] +; CHECK-NEXT: [[ADD_I52_4:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_4]] +; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_4]], [[CONV1]] ; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) ; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) ; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) -; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP183]] +; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP219]] ; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] ; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 ; CHECK-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 ; CHECK-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 ; CHECK-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] ; CHECK-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] +; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_4]] ; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x i32> [[TMP188]], i32 0 ; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP189]] ; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP188]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll index 360b258f216c5..f875d45db61dd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll @@ -14,7 +14,7 @@ define void @test() { ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP0]], float [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP6]]) ; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-cmp-swapped-pred-parent.ll b/llvm/test/Transforms/SLPVectorizer/alternate-cmp-swapped-pred-parent.ll index 371b23019498d..afca39ad8938a 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-cmp-swapped-pred-parent.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-cmp-swapped-pred-parent.ll @@ -12,7 +12,8 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 5 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> , i16 [[CALL37]], i32 6 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[CALL]], i32 7 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i16> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret void ; @@ -43,7 +44,8 @@ define void @test1() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL37]], i32 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> , i16 [[CALL]], i32 6 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[CALL37]], i32 7 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i16> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll index 261ec2b3935d7..40568f9c8a509 100644 --- a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll @@ -1,31 +1,56 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} -; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix X86 %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix AARCH64 %} define i1 @test(float %0, double %1) { -; CHECK-LABEL: define i1 @test -; CHECK-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> , <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x double> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP10]], i64 0) -; CHECK-NEXT: [[TMP13:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP11]], i64 0) -; CHECK-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP13]], <2 x double> [[TMP6]], i64 4) -; CHECK-NEXT: [[TMP15:%.*]] = fsub <8 x double> [[TMP12]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP12]], [[TMP14]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> [[TMP16]], <8 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fptrunc <8 x double> [[TMP17]] to <8 x float> -; CHECK-NEXT: [[TMP19:%.*]] = fmul <8 x float> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = fcmp oeq <8 x float> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = freeze <8 x i1> [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP21]]) -; CHECK-NEXT: ret i1 [[TMP22]] +; X86-LABEL: define i1 @test +; X86-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { +; X86-NEXT: [[TMP3:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 3 +; X86-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> +; X86-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 +; X86-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] +; X86-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> +; X86-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> , <4 x i32> +; X86-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <4 x i32> +; X86-NEXT: [[TMP10:%.*]] = fmul <4 x double> [[TMP8]], [[TMP9]] +; X86-NEXT: [[TMP11:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]] +; X86-NEXT: [[TMP12:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP10]], i64 0) +; X86-NEXT: [[TMP13:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP11]], i64 0) +; X86-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP13]], <2 x double> [[TMP6]], i64 4) +; X86-NEXT: [[TMP15:%.*]] = fsub <8 x double> [[TMP12]], [[TMP14]] +; X86-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP12]], [[TMP14]] +; X86-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> [[TMP16]], <8 x i32> +; X86-NEXT: [[TMP18:%.*]] = fptrunc <8 x double> [[TMP17]] to <8 x float> +; X86-NEXT: [[TMP19:%.*]] = fmul <8 x float> [[TMP18]], zeroinitializer +; X86-NEXT: [[TMP20:%.*]] = fcmp oeq <8 x float> [[TMP19]], zeroinitializer +; X86-NEXT: [[TMP21:%.*]] = freeze <8 x i1> [[TMP20]] +; X86-NEXT: [[TMP22:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP21]]) +; X86-NEXT: ret i1 [[TMP22]] +; +; AARCH64-LABEL: define i1 @test +; AARCH64-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { +; AARCH64-NEXT: [[TMP3:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 3 +; AARCH64-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> +; AARCH64-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 +; AARCH64-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] +; AARCH64-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> +; AARCH64-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> , <4 x i32> +; AARCH64-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <4 x i32> +; AARCH64-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP4]], <4 x i32> +; AARCH64-NEXT: [[TMP11:%.*]] = fmul <4 x double> [[TMP8]], [[TMP10]] +; AARCH64-NEXT: [[TMP12:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]] +; AARCH64-NEXT: [[TMP13:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP11]], i64 0) +; AARCH64-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP12]], i64 0) +; AARCH64-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP14]], <2 x double> [[TMP6]], i64 4) +; AARCH64-NEXT: [[TMP16:%.*]] = fsub <8 x double> [[TMP13]], [[TMP15]] +; AARCH64-NEXT: [[TMP17:%.*]] = fmul <8 x double> [[TMP13]], [[TMP15]] +; AARCH64-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> +; AARCH64-NEXT: [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float> +; AARCH64-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer +; AARCH64-NEXT: [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer +; AARCH64-NEXT: [[TMP22:%.*]] = freeze <8 x i1> [[TMP21]] +; AARCH64-NEXT: [[TMP23:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP22]]) +; AARCH64-NEXT: ret i1 [[TMP23]] ; %3 = fpext float %0 to double %4 = fpext float 0.000000e+00 to double diff --git a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll index dbd91199c24ec..c704baaad6f71 100644 --- a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll @@ -24,7 +24,9 @@ define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) { ; CHECK: [[L47]]: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x ptr> [[TMP25]], <2 x ptr> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]] diff --git a/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll b/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll index a854c61db6d28..a42c8f2c650ae 100644 --- a/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll +++ b/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll @@ -11,8 +11,8 @@ define i32 @test(i8 %0) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = load volatile i8, ptr null, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP5]], <2 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> , <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP5]], <2 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> , <8 x i8> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <8 x i8> zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TEST_STRUCTCOPY_14_S14_CM_COERCE_SROA_2_0_COPYLOAD:%.*]] = load i48, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i48> , i48 [[TEST_STRUCTCOPY_14_S14_CM_COERCE_SROA_2_0_COPYLOAD]], i32 0 @@ -21,9 +21,9 @@ define i32 @test(i8 %0) { ; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(21) null, align 2 ; CHECK-NEXT: [[TMP13:%.*]] = load volatile i8, ptr null, align 2 ; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i8>, ptr addrspace(21) getelementptr inbounds (i8, ptr addrspace(21) null, i64 8), align 8 -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i8> [[TMP14]], <2 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i8> [[TMP15]], <8 x i8> , <8 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i8> [[TMP16]], i8 [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i8> , i8 [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i8> [[TMP14]], <2 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x i8> [[TMP15]], <8 x i8> [[TMP16]], <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> , i8 [[TMP0]], i32 3 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP13]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP17]], [[TMP19]] diff --git a/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll b/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll index 561182d5e4f49..940ee5b95871d 100644 --- a/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll @@ -1,30 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=x86_64 -slp-threshold=-150 | FileCheck %s %} -; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-150 | FileCheck %s %} +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=x86_64 -slp-threshold=-150 | FileCheck %s --check-prefix X86 %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-150 | FileCheck %s --check-prefix AARCH64 %} define i1 @test(ptr %arg, ptr %i233, i64 %i241, ptr %i235, ptr %i237, ptr %i227) { -; CHECK-LABEL: @test( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[I226:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i32 7 -; CHECK-NEXT: [[I242:%.*]] = getelementptr double, ptr [[I233:%.*]], i64 [[I241:%.*]] -; CHECK-NEXT: [[I245:%.*]] = getelementptr double, ptr [[I235:%.*]], i64 [[I241]] -; CHECK-NEXT: [[I248:%.*]] = getelementptr double, ptr [[I237:%.*]], i64 [[I241]] -; CHECK-NEXT: [[I250:%.*]] = getelementptr double, ptr [[I227:%.*]], i64 [[I241]] -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr [[I226]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> , ptr [[I242]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[I250]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <8 x ptr> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[I245]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[I248]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> , <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <8 x ptr> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i1> [[TMP4]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP11]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = and i1 [[TMP12]], false -; CHECK-NEXT: ret i1 [[OP_RDX]] +; X86-LABEL: @test( +; X86-NEXT: bb: +; X86-NEXT: [[I226:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i32 7 +; X86-NEXT: [[I242:%.*]] = getelementptr double, ptr [[I233:%.*]], i64 [[I241:%.*]] +; X86-NEXT: [[I245:%.*]] = getelementptr double, ptr [[I235:%.*]], i64 [[I241]] +; X86-NEXT: [[I248:%.*]] = getelementptr double, ptr [[I237:%.*]], i64 [[I241]] +; X86-NEXT: [[I250:%.*]] = getelementptr double, ptr [[I227:%.*]], i64 [[I241]] +; X86-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr [[I226]], align 8 +; X86-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <8 x i32> +; X86-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> , ptr [[I242]], i32 0 +; X86-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[I250]], i32 2 +; X86-NEXT: [[TMP4:%.*]] = icmp ult <8 x ptr> [[TMP3]], [[TMP1]] +; X86-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr> poison, ptr [[I250]], i32 0 +; X86-NEXT: [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[I242]], i32 1 +; X86-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[I245]], i32 2 +; X86-NEXT: [[TMP8:%.*]] = insertelement <8 x ptr> [[TMP7]], ptr [[I248]], i32 3 +; X86-NEXT: [[TMP9:%.*]] = shufflevector <8 x ptr> [[TMP8]], <8 x ptr> poison, <8 x i32> +; X86-NEXT: [[TMP10:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> , <8 x i32> +; X86-NEXT: [[TMP11:%.*]] = icmp ult <8 x ptr> [[TMP9]], [[TMP10]] +; X86-NEXT: [[TMP12:%.*]] = or <8 x i1> [[TMP4]], [[TMP11]] +; X86-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP12]]) +; X86-NEXT: [[OP_RDX:%.*]] = and i1 [[TMP13]], false +; X86-NEXT: ret i1 [[OP_RDX]] +; +; AARCH64-LABEL: @test( +; AARCH64-NEXT: bb: +; AARCH64-NEXT: [[I226:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i32 7 +; AARCH64-NEXT: [[I242:%.*]] = getelementptr double, ptr [[I233:%.*]], i64 [[I241:%.*]] +; AARCH64-NEXT: [[I245:%.*]] = getelementptr double, ptr [[I235:%.*]], i64 [[I241]] +; AARCH64-NEXT: [[I248:%.*]] = getelementptr double, ptr [[I237:%.*]], i64 [[I241]] +; AARCH64-NEXT: [[I250:%.*]] = getelementptr double, ptr [[I227:%.*]], i64 [[I241]] +; AARCH64-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr [[I226]], align 8 +; AARCH64-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <8 x i32> +; AARCH64-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> , ptr [[I242]], i32 0 +; AARCH64-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[I250]], i32 2 +; AARCH64-NEXT: [[TMP4:%.*]] = icmp ult <8 x ptr> [[TMP3]], [[TMP1]] +; AARCH64-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <4 x i32> +; AARCH64-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[I245]], i32 2 +; AARCH64-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[I248]], i32 3 +; AARCH64-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <8 x i32> +; AARCH64-NEXT: [[TMP9:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> , <8 x i32> +; AARCH64-NEXT: [[TMP10:%.*]] = icmp ult <8 x ptr> [[TMP8]], [[TMP9]] +; AARCH64-NEXT: [[TMP11:%.*]] = or <8 x i1> [[TMP4]], [[TMP10]] +; AARCH64-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP11]]) +; AARCH64-NEXT: [[OP_RDX:%.*]] = and i1 [[TMP12]], false +; AARCH64-NEXT: ret i1 [[OP_RDX]] ; bb: %i226 = getelementptr ptr, ptr %arg, i32 7 diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll index 61a84a67c9ff1..056b6222cae72 100644 --- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll +++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll @@ -13,9 +13,9 @@ define void @func(i32 %0) { ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i32> [[TMP11]], <32 x i32> , <32 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 0, i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> , i32 [[TMP11]], i32 30 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i32> [[TMP12]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16) ; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP14]], <4 x i32> zeroinitializer, i64 24) ; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP15]], <2 x i32> zeroinitializer, i64 14)