diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index ed3e45dd2c6c8..1f4bef08b81ce 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -246,15 +246,13 @@ class VPBuilder { new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name)); } - VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op, - Type *ResultTy, DebugLoc DL) { - return tryInsertInstruction( - new VPScalarCastRecipe(Opcode, Op, ResultTy, DL)); - } - - VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op, - Type *ResultTy) { - return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy)); + VPInstructionWithType *createCast(Instruction::CastOps Opcode, VPValue *Op, + Type *ResultTy, DebugLoc DL = {}, + const Twine &Name = "", + Instruction *CI = nullptr) { + auto *VPI = new VPInstructionWithType(Opcode, {Op}, ResultTy, DL, Name); + VPI->setUnderlyingValue(CI); + return tryInsertInstruction(VPI); } VPScalarIVStepsRecipe * diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cb860a472d8f7..6a5d4d3057664 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4434,8 +4434,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( [](const auto *R) { return Instruction::Load; }) .Case( [](const auto *R) { return Instruction::Call; }) - .Case( + .Case( [](const auto *R) { return R->getOpcode(); }) .Case([](const VPInterleaveRecipe *R) { return R->getStoredValues().empty() ? Instruction::Load @@ -4496,15 +4495,11 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, if (EphemeralRecipes.contains(&R)) continue; // Continue early if the recipe is considered to not produce a vector - // result. Note that this includes VPInstruction where some opcodes may - // produce a vector, to preserve existing behavior as VPInstructions model - // aspects not directly mapped to existing IR instructions. + // result. switch (R.getVPDefID()) { case VPDef::VPDerivedIVSC: case VPDef::VPScalarIVStepsSC: - case VPDef::VPScalarCastSC: case VPDef::VPReplicateSC: - case VPDef::VPInstructionSC: case VPDef::VPCanonicalIVPHISC: case VPDef::VPVectorPointerSC: case VPDef::VPReverseVectorPointerSC: @@ -4517,7 +4512,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPActiveLaneMaskPHISC: case VPDef::VPWidenCallSC: case VPDef::VPWidenCanonicalIVSC: - case VPDef::VPWidenCastSC: case VPDef::VPWidenGEPSC: case VPDef::VPWidenIntrinsicSC: case VPDef::VPWidenSC: @@ -4534,6 +4528,15 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPWidenStoreEVLSC: case VPDef::VPWidenStoreSC: break; + case VPDef::VPInstructionSC: { + // Note that for VPInstruction some opcodes may produce a vector. To + // preserve existing behavior only consider them vector-generating if + // they are casts with an underlying value. + if (Instruction::isCast(cast(&R)->getOpcode()) && + R.getVPSingleValue()->getUnderlyingValue()) + break; + continue; + } default: llvm_unreachable("unhandled recipe"); } @@ -8938,8 +8941,15 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( } if (auto *CI = dyn_cast(Instr)) { - return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), - *CI); + auto *VPI = + isa(CI) + ? new VPInstructionWithType(CI->getOpcode(), {Operands[0]}, + CI->getType(), {CI->hasNonNeg()}, {}) + : new VPInstructionWithType(CI->getOpcode(), {Operands[0]}, + CI->getType(), {}); + + VPI->setUnderlyingValue(CI); + return VPI; } return tryToWiden(Instr, Operands); @@ -9061,9 +9071,9 @@ static VPInstruction *addResumePhiRecipeForInduction( // the widest induction) and thus may be wider than the induction here. Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV); if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) { - EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue, - ScalarTypeOfWideIV, - WideIV->getDebugLoc()); + EndValue = + VectorPHBuilder.createCast(Instruction::Trunc, EndValue, + ScalarTypeOfWideIV, WideIV->getDebugLoc()); } auto *ResumePhiRecipe = @@ -9861,12 +9871,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( RdxDesc.getRecurrenceKind())) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = - new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); + auto *Trunc = new VPInstructionWithType(Instruction::Trunc, NewExitingVPV, + RdxTy, {}); auto *Extnd = RdxDesc.isSigned() - ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) - : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); + ? new VPInstructionWithType(Instruction::SExt, Trunc, PhiTy, {}) + : new VPInstructionWithType(Instruction::ZExt, Trunc, PhiTy, {}); Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); Extnd->insertAfter(Trunc); @@ -10396,8 +10406,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, assert(all_of(IV->users(), [](const VPUser *U) { return isa(U) || - isa(U) || isa(U) || + Instruction::isCast( + cast(U)->getOpcode()) || + cast(U)->getOpcode() == Instruction::Add; }) && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b1288c42b20f2..f47109156741a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -519,7 +519,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPReverseVectorPointerSC: case VPRecipeBase::VPWidenCallSC: case VPRecipeBase::VPWidenCanonicalIVSC: - case VPRecipeBase::VPWidenCastSC: case VPRecipeBase::VPWidenGEPSC: case VPRecipeBase::VPWidenIntrinsicSC: case VPRecipeBase::VPWidenSC: @@ -533,7 +532,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenIntOrFpInductionSC: case VPRecipeBase::VPWidenPointerInductionSC: case VPRecipeBase::VPReductionPHISC: - case VPRecipeBase::VPScalarCastSC: case VPRecipeBase::VPScalarPHISC: case VPRecipeBase::VPPartialReductionSC: return true; @@ -599,13 +597,15 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {} }; + struct NonNegFlagsTy { + char NonNeg : 1; + NonNegFlagsTy(bool IsNonNeg = false) : NonNeg(IsNonNeg) {} + }; + private: struct ExactFlagsTy { char IsExact : 1; }; - struct NonNegFlagsTy { - char NonNeg : 1; - }; struct FastMathFlagsTy { char AllowReassoc : 1; char NoNaNs : 1; @@ -699,6 +699,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {} + template + VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, + NonNegFlagsTy NonNegFlags, DebugLoc DL = {}) + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::NonNegOp), + NonNegFlags(NonNegFlags) {} + protected: template VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, @@ -711,7 +717,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { return R->getVPDefID() == VPRecipeBase::VPInstructionSC || R->getVPDefID() == VPRecipeBase::VPWidenSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || - R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC || @@ -954,6 +959,12 @@ class VPInstruction : public VPRecipeWithIRFlags, VPInstruction(unsigned Opcode, std::initializer_list Operands, FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = ""); + VPInstruction(unsigned Opcode, ArrayRef Operands, + NonNegFlagsTy NonNegFlags, DebugLoc DL = {}, + const Twine &Name = "") + : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, NonNegFlags, DL), + Opcode(Opcode), Name(Name.str()) {} + VP_CLASSOF_IMPL(VPDef::VPInstructionSC) VPInstruction *clone() override { @@ -1026,6 +1037,60 @@ class VPInstruction : public VPRecipeWithIRFlags, StringRef getName() const { return Name; } }; +/// A specialization of VPInstruction augmenting it with a dedicated result +/// type, to be used when the opcode and operands of the VPInstruction don't +/// directly determine the result type. +class VPInstructionWithType : public VPInstruction { + /// Scalar result type produced by the recipe. + Type *ResultTy; + + Value *generate(VPTransformState &State); + +public: + VPInstructionWithType(unsigned Opcode, ArrayRef Operands, + Type *ResultTy, DebugLoc DL, const Twine &Name = "") + : VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {} + + VPInstructionWithType(unsigned Opcode, ArrayRef Operands, + Type *ResultTy, NonNegFlagsTy Flags, DebugLoc DL, + const Twine &Name = "") + : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {} + + static inline bool classof(const VPRecipeBase *R) { + auto *VPI = dyn_cast(R); + return VPI && Instruction::isCast(VPI->getOpcode()); + } + + static inline bool classof(const VPUser *R) { + return isa(cast(R)); + } + + VPInstruction *clone() override { + auto *New = + new VPInstructionWithType(getOpcode(), {getOperand(0)}, getResultType(), + {}, getDebugLoc(), getName()); + New->setUnderlyingValue(getUnderlyingValue()); + New->transferFlags(*this); + return New; + } + + void execute(VPTransformState &State) override; + + /// Return the cost of this VPIRInstruction. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + + Type *getResultType() const { return ResultTy; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + bool onlyFirstLaneUsed(const VPValue *Op) const override; +}; + /// A recipe to wrap on original IR instruction not to be modified during /// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, /// and it is used to add a new incoming value for the single predecessor VPBB. @@ -1131,106 +1196,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { #endif }; -/// VPWidenCastRecipe is a recipe to create vector cast instructions. -class VPWidenCastRecipe : public VPRecipeWithIRFlags { - /// Cast instruction opcode. - Instruction::CastOps Opcode; - - /// Result type for the cast. - Type *ResultTy; - -public: - VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - CastInst &UI) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode), - ResultTy(ResultTy) { - assert(UI.getOpcode() == Opcode && - "opcode of underlying cast doesn't match"); - } - - VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode), - ResultTy(ResultTy) {} - - ~VPWidenCastRecipe() override = default; - - VPWidenCastRecipe *clone() override { - if (auto *UV = getUnderlyingValue()) - return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, - *cast(UV)); - - return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy); - } - - VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) - - /// Produce widened copies of the cast. - void execute(VPTransformState &State) override; - - /// Return the cost of this VPWidenCastRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - Instruction::CastOps getOpcode() const { return Opcode; } - - /// Returns the result type of the cast. - Type *getResultType() const { return ResultTy; } -}; - -/// VPScalarCastRecipe is a recipe to create scalar cast instructions. -class VPScalarCastRecipe : public VPSingleDefRecipe { - Instruction::CastOps Opcode; - - Type *ResultTy; - - Value *generate(VPTransformState &State); - -public: - VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - DebugLoc DL) - : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode), - ResultTy(ResultTy) {} - - ~VPScalarCastRecipe() override = default; - - VPScalarCastRecipe *clone() override { - return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy, - getDebugLoc()); - } - - VP_CLASSOF_IMPL(VPDef::VPScalarCastSC) - - void execute(VPTransformState &State) override; - - /// Return the cost of this VPScalarCastRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override { - // TODO: Compute accurate cost after retiring the legacy cost model. - return 0; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// Returns the result type of the cast. - Type *getResultType() const { return ResultTy; } - - bool onlyFirstLaneUsed(const VPValue *Op) const override { - // At the moment, only uniform codegen is implemented. - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } -}; - /// A recipe for widening vector intrinsics. class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { /// ID of the vector intrinsic to widen. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 6f6875f0e5e0e..028aebd18cf53 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -252,20 +252,15 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { VPPartialReductionRecipe>([this](const VPRecipeBase *R) { return inferScalarType(R->getOperand(0)); }) + .Case( + [](const auto *R) { return R->getResultType(); }) .Case( [this](const auto *R) { return inferScalarTypeForRecipe(R); }) - .Case([](const VPWidenIntrinsicRecipe *R) { - return R->getResultType(); - }) .Case([V](const VPInterleaveRecipe *R) { // TODO: Use info from interleave group. return V->getUnderlyingValue()->getType(); }) - .Case( - [](const VPWidenCastRecipe *R) { return R->getResultType(); }) - .Case( - [](const VPScalarCastRecipe *R) { return R->getResultType(); }) .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV()->getType(); }) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 8c11d93734667..3594b36bdee08 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -204,7 +204,7 @@ using UnaryVPInstruction_match = template using AllUnaryRecipe_match = UnaryRecipe_match; + VPInstruction>; template @@ -220,7 +220,7 @@ template using AllBinaryRecipe_match = BinaryRecipe_match; + VPReplicateRecipe, VPInstruction>; template inline UnaryVPInstruction_match diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d154d54c37862..2a8e00dc649fa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -79,7 +79,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPReductionSC: case VPVectorPointerSC: case VPWidenCanonicalIVSC: - case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: case VPWidenLoadEVLSC: @@ -126,7 +125,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPReductionSC: case VPVectorPointerSC: case VPWidenCanonicalIVSC: - case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: case VPWidenPHISC: @@ -148,7 +146,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { switch (getVPDefID()) { case VPDerivedIVSC: case VPPredInstPHISC: - case VPScalarCastSC: case VPReverseVectorPointerSC: return false; case VPInstructionSC: @@ -165,7 +162,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPScalarIVStepsSC: case VPVectorPointerSC: case VPWidenCanonicalIVSC: - case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: case VPWidenPHISC: @@ -311,7 +307,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, // The extend could come from outside the plan. if (!R) return TargetTransformInfo::PR_None; - auto *WidenCastR = dyn_cast(R); + auto *WidenCastR = dyn_cast(R); if (!WidenCastR) return TargetTransformInfo::PR_None; if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt) @@ -413,7 +409,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const { } bool VPInstruction::canGenerateScalarForFirstLane() const { - if (Instruction::isBinaryOp(getOpcode())) + if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode())) return true; if (isSingleScalar() || isVectorToScalar()) return true; @@ -810,7 +806,7 @@ void VPInstruction::execute(VPTransformState &State) { } bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { - if (Instruction::isBinaryOp(getOpcode())) + if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode())) return false; switch (getOpcode()) { case Instruction::ICmp: @@ -961,6 +957,105 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, } #endif +void VPInstructionWithType::execute(VPTransformState &State) { + assert(Instruction::isCast(getOpcode()) && "must be cast"); + State.setDebugLocFrom(getDebugLoc()); + auto &Builder = State.Builder; + bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); + Type *DestTy = OnlyFirstLaneUsed ? getResultType() + : VectorType::get(getResultType(), State.VF); + VPValue *Op = getOperand(0); + Value *A = OnlyFirstLaneUsed ? State.get(Op, VPLane(0)) : State.get(Op); + Value *Cast = + Builder.CreateCast(Instruction::CastOps(getOpcode()), A, DestTy); + if (OnlyFirstLaneUsed) + State.set(this, Cast, VPLane(0)); + else + State.set(this, Cast); + State.addMetadata(Cast, cast_or_null(getUnderlyingValue())); + if (auto *CastOp = dyn_cast(Cast)) + setFlags(CastOp); +} + +InstructionCost VPInstructionWithType::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + assert(Instruction::isCast(getOpcode()) && "must be cast"); + // TODO: In some cases, casts are created but not considered in + // the legacy cost model, including truncates/extends when evaluating a + // reduction in a smaller type. + if (!getUnderlyingValue()) + return 0; + + // Computes the CastContextHint from a recipes that may access memory. + auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint { + if (VF.isScalar()) + return TTI::CastContextHint::Normal; + if (isa(R)) + return TTI::CastContextHint::Interleave; + if (const auto *ReplicateRecipe = dyn_cast(R)) + return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked + : TTI::CastContextHint::Normal; + const auto *WidenMemoryRecipe = dyn_cast(R); + if (WidenMemoryRecipe == nullptr) + return TTI::CastContextHint::None; + if (!WidenMemoryRecipe->isConsecutive()) + return TTI::CastContextHint::GatherScatter; + if (WidenMemoryRecipe->isReverse()) + return TTI::CastContextHint::Reversed; + if (WidenMemoryRecipe->isMasked()) + return TTI::CastContextHint::Masked; + return TTI::CastContextHint::Normal; + }; + + VPValue *Operand = getOperand(0); + TTI::CastContextHint CCH = TTI::CastContextHint::None; + // For Trunc/FPTrunc, get the context from the only user. + if ((getOpcode() == Instruction::Trunc || + getOpcode() == Instruction::FPTrunc) && + !hasMoreThanOneUniqueUser() && getNumUsers() > 0) { + if (auto *StoreRecipe = dyn_cast(*user_begin())) + CCH = ComputeCCH(StoreRecipe); + } + // For Z/Sext, get the context from the operand. + else if (getOpcode() == Instruction::ZExt || + getOpcode() == Instruction::SExt || + getOpcode() == Instruction::FPExt) { + if (Operand->isLiveIn()) + CCH = TTI::CastContextHint::Normal; + else if (Operand->getDefiningRecipe()) + CCH = ComputeCCH(Operand->getDefiningRecipe()); + } + + auto *SrcTy = + cast(toVectorTy(Ctx.Types.inferScalarType(Operand), VF)); + auto *DestTy = cast(toVectorTy(getResultType(), VF)); + // Arm TTI will use the underlying instruction to determine the cost. + return Ctx.TTI.getCastInstrCost( + getOpcode(), DestTy, SrcTy, CCH, Ctx.CostKind, + dyn_cast_if_present(getUnderlyingValue())); +} + +bool VPInstructionWithType::onlyFirstLaneUsed(const VPValue *Op) const { + return vputils::onlyFirstLaneUsed(this); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = " << Instruction::getOpcodeName(getOpcode()); + printFlags(O); + printOperands(O, SlotTracker); + O << " to " << *getResultType(); + + if (auto DL = getDebugLoc()) { + O << ", !dbg "; + DL.print(O); + } +} +#endif + void VPIRInstruction::execute(VPTransformState &State) { assert((isa(&I) || getNumOperands() == 0) && "Only PHINodes can have extra operands"); @@ -1620,87 +1715,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -void VPWidenCastRecipe::execute(VPTransformState &State) { - State.setDebugLocFrom(getDebugLoc()); - auto &Builder = State.Builder; - /// Vectorize casts. - assert(State.VF.isVector() && "Not vectorizing?"); - Type *DestTy = VectorType::get(getResultType(), State.VF); - VPValue *Op = getOperand(0); - Value *A = State.get(Op); - Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); - State.set(this, Cast); - State.addMetadata(Cast, cast_or_null(getUnderlyingValue())); - if (auto *CastOp = dyn_cast(Cast)) - setFlags(CastOp); -} - -InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // TODO: In some cases, VPWidenCastRecipes are created but not considered in - // the legacy cost model, including truncates/extends when evaluating a - // reduction in a smaller type. - if (!getUnderlyingValue()) - return 0; - // Computes the CastContextHint from a recipes that may access memory. - auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint { - if (VF.isScalar()) - return TTI::CastContextHint::Normal; - if (isa(R)) - return TTI::CastContextHint::Interleave; - if (const auto *ReplicateRecipe = dyn_cast(R)) - return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked - : TTI::CastContextHint::Normal; - const auto *WidenMemoryRecipe = dyn_cast(R); - if (WidenMemoryRecipe == nullptr) - return TTI::CastContextHint::None; - if (!WidenMemoryRecipe->isConsecutive()) - return TTI::CastContextHint::GatherScatter; - if (WidenMemoryRecipe->isReverse()) - return TTI::CastContextHint::Reversed; - if (WidenMemoryRecipe->isMasked()) - return TTI::CastContextHint::Masked; - return TTI::CastContextHint::Normal; - }; - - VPValue *Operand = getOperand(0); - TTI::CastContextHint CCH = TTI::CastContextHint::None; - // For Trunc/FPTrunc, get the context from the only user. - if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) && - !hasMoreThanOneUniqueUser() && getNumUsers() > 0) { - if (auto *StoreRecipe = dyn_cast(*user_begin())) - CCH = ComputeCCH(StoreRecipe); - } - // For Z/Sext, get the context from the operand. - else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt || - Opcode == Instruction::FPExt) { - if (Operand->isLiveIn()) - CCH = TTI::CastContextHint::Normal; - else if (Operand->getDefiningRecipe()) - CCH = ComputeCCH(Operand->getDefiningRecipe()); - } - - auto *SrcTy = - cast(toVectorTy(Ctx.Types.inferScalarType(Operand), VF)); - auto *DestTy = cast(toVectorTy(getResultType(), VF)); - // Arm TTI will use the underlying instruction to determine the cost. - return Ctx.TTI.getCastInstrCost( - Opcode, DestTy, SrcTy, CCH, Ctx.CostKind, - dyn_cast_if_present(getUnderlyingValue())); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-CAST "; - printAsOperand(O, SlotTracker); - O << " = " << Instruction::getOpcodeName(Opcode); - printFlags(O); - printOperands(O, SlotTracker); - O << " to " << *getResultType(); -} -#endif - InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); @@ -2436,38 +2450,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -Value *VPScalarCastRecipe ::generate(VPTransformState &State) { - State.setDebugLocFrom(getDebugLoc()); - assert(vputils::onlyFirstLaneUsed(this) && - "Codegen only implemented for first lane."); - switch (Opcode) { - case Instruction::SExt: - case Instruction::ZExt: - case Instruction::Trunc: { - // Note: SExt/ZExt not used yet. - Value *Op = State.get(getOperand(0), VPLane(0)); - return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); - } - default: - llvm_unreachable("opcode not implemented yet"); - } -} - -void VPScalarCastRecipe ::execute(VPTransformState &State) { - State.set(this, generate(State), VPLane(0)); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "SCALAR-CAST "; - printAsOperand(O, SlotTracker); - O << " = " << Instruction::getOpcodeName(Opcode) << " "; - printOperands(O, SlotTracker); - O << " to " << *ResultTy; -} -#endif - void VPBranchOnMaskRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); assert(State.Lane && "Branch on Mask works only on single instance."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7646350ca0ed2..27bd2be555045 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -90,8 +90,9 @@ void VPlanTransforms::VPInstructionsToVPRecipes( } else if (SelectInst *SI = dyn_cast(Inst)) { NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); } else if (auto *CI = dyn_cast(Inst)) { - NewRecipe = new VPWidenCastRecipe( - CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI); + NewRecipe = new VPInstructionWithType( + CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), {}); + NewRecipe->getVPSingleValue()->setUnderlyingValue(CI); } else { NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); } @@ -552,7 +553,7 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() && "Not truncating."); assert(ResultTy->isIntegerTy() && "Truncation requires an integer type"); - BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy, DL); + BaseIV = Builder.createCast(Instruction::Trunc, BaseIV, TruncTy, DL); ResultTy = TruncTy; } @@ -566,7 +567,7 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, cast(HeaderVPBB->getSingleHierarchicalPredecessor()); VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(VecPreheader); - Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy, DL); + Step = Builder.createCast(Instruction::Trunc, Step, ResultTy, DL); } return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step); } @@ -927,8 +928,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) ? Instruction::SExt : Instruction::ZExt; - auto *VPC = - new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); + auto *VPC = new VPInstructionWithType(Instruction::CastOps(ExtOpcode), + A, TruncTy, {}); if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { // UnderlyingExt has distinct return type, used to retain legacy cost. VPC->setUnderlyingValue(UnderlyingExt); @@ -936,7 +937,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { VPC->insertBefore(&R); Trunc->replaceAllUsesWith(VPC); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { - auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); + auto *VPC = + new VPInstructionWithType(Instruction::Trunc, A, TruncTy, {}); VPC->insertBefore(&R); Trunc->replaceAllUsesWith(VPC); } @@ -1336,14 +1338,14 @@ void VPlanTransforms::truncateToMinimalBitwidths( // cannot use RAUW after creating a new truncate, as this would could make // other uses have different types for their operands, making them invalidly // typed. - DenseMap ProcessedTruncs; + DenseMap ProcessedTruncs; Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); VPTypeAnalysis TypeInfo(CanonicalIVType); VPBasicBlock *PH = Plan.getVectorPreheader(); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getVectorLoopRegion()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R)) continue; @@ -1360,7 +1362,7 @@ void VPlanTransforms::truncateToMinimalBitwidths( // type. Skip those here, after incrementing NumProcessedRecipes. Also // skip casts which do not need to be handled explicitly here, as // redundant casts will be removed during recipe simplification. - if (isa(&R)) { + if (isa(&R)) { #ifndef NDEBUG // If any of the operands is a live-in and not used by VPWidenRecipe or // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as @@ -1404,8 +1406,8 @@ void VPlanTransforms::truncateToMinimalBitwidths( if (OldResSizeInBits != NewResSizeInBits && !match(&R, m_Binary(m_VPValue(), m_VPValue()))) { // Extend result to original width. - auto *Ext = - new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy); + auto *Ext = new VPInstructionWithType(Instruction::ZExt, {ResultVPV}, + OldResTy, {}); Ext->insertAfter(&R); ResultVPV->replaceAllUsesWith(Ext); Ext->setOperand(0, ResultVPV); @@ -1431,10 +1433,10 @@ void VPlanTransforms::truncateToMinimalBitwidths( assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate"); auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.insert({Op, nullptr}); - VPWidenCastRecipe *NewOp = - IterIsEmpty - ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy) - : ProcessedIter->second; + VPInstructionWithType *NewOp = + IterIsEmpty ? new VPInstructionWithType(Instruction::Trunc, Op, + NewResTy, {}) + : ProcessedIter->second; R.setOperand(Idx, NewOp); if (!IterIsEmpty) continue; @@ -1749,9 +1751,9 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { TypeInfo.inferScalarType(MaxEVL)->getScalarSizeInBits(); VFSize != 32) { VPBuilder Builder(LoopRegion->getPreheaderVPBB()); - MaxEVL = Builder.createScalarCast( - VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL, - Type::getInt32Ty(Ctx), DebugLoc()); + MaxEVL = Builder.createCast(VFSize > 32 ? Instruction::Trunc + : Instruction::ZExt, + MaxEVL, Type::getInt32Ty(Ctx), DebugLoc()); } PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl"); PrevEVL->insertBefore(*Header, Header->getFirstNonPhi()); @@ -1875,7 +1877,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength( VPSingleDefRecipe *OpVPEVL = VPEVL; if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits(); IVSize != 32) { - OpVPEVL = Builder.createScalarCast( + OpVPEVL = Builder.createCast( IVSize < 32 ? Instruction::Trunc : Instruction::ZExt, OpVPEVL, CanonicalIVPHI->getScalarType(), CanonicalIVIncrement->getDebugLoc()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 89e372d6b46cf..092949ae0a5ed 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -160,7 +160,7 @@ void UnrollState::unrollWidenInductionByUF( if (TypeInfo.inferScalarType(VectorStep) != IVTy) { Instruction::CastOps CastOp = IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc; - VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy); + VectorStep = Builder.createCast(CastOp, VectorStep, IVTy); ToSkip.insert(VectorStep->getDefiningRecipe()); } @@ -170,8 +170,7 @@ void UnrollState::unrollWidenInductionByUF( : nullptr; if (!ConstStep || ConstStep->getValue() != 1) { if (TypeInfo.inferScalarType(ScalarStep) != IVTy) { - ScalarStep = - Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy); + ScalarStep = Builder.createCast(Instruction::Trunc, ScalarStep, IVTy); ToSkip.insert(ScalarStep->getDefiningRecipe()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 1a7322ec0aff6..c9063ae911ef3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -113,9 +113,13 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { all_of(R->operands(), [](VPValue *Op) { return isUniformAcrossVFsAndUFs(Op); }); }) - .Case([](const auto *R) { - // A cast is uniform according to its operand. - return isUniformAcrossVFsAndUFs(R->getOperand(0)); + .Case([](const auto *VPI) { + return Instruction::isCast(VPI->getOpcode()) + ? all_of(VPI->operands(), + [](VPValue *Op) { + return isUniformAcrossVFsAndUFs(Op); + }) + : false; }) .Default([](const VPRecipeBase *) { // A value is considered non-uniform // unless proven otherwise. diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 0a59b137bbd79..f17729f0f153d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -332,13 +332,11 @@ class VPDef { VPReductionSC, VPPartialReductionSC, VPReplicateSC, - VPScalarCastSC, VPScalarIVStepsSC, VPVectorPointerSC, VPReverseVectorPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, - VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 1b3b69ea6a13d..882323d5a1e4f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -146,8 +146,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) - .Case( - [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) + .Case( + [&](const auto *S) { return VerifyEVLUse(*S, 0); }) .Case([&](const VPInstruction *I) { if (I->getOpcode() != Instruction::Add) { errs() << "EVL is used as an operand in non-VPInstruction::Add\n"; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll index 7d71ff87ec8be..cea35376af18b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-fp-ext-trunc-illegal-type.ll @@ -9,7 +9,7 @@ target triple = "aarch64-unknown-linux-gnu" ;; registers required for a when trying to maximize ;; vector bandwidth with SVE. -; CHECK: Cost of Invalid for VF vscale x 2: WIDEN-CAST ir<%load.ext> = fpext ir<%load.in> to fp128 +; CHECK: Cost of Invalid for VF vscale x 2: EMIT ir<%load.ext> = fpext ir<%load.in> to fp128 define void @load_ext_trunc_store(ptr readonly %in, ptr noalias %out, i64 %N) { ; CHECK-LABEL: define void @load_ext_trunc_store( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 90ef2da3d1637..7cbb54d655c8c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -72,7 +72,7 @@ for.end: ; preds = %for.body ; DEBUG: Cost of Invalid for VF vscale x 1: induction instruction %indvars.iv.next1295 = add i3 %indvars.iv1294, 1 ; DEBUG: Cost of Invalid for VF vscale x 1: induction instruction %indvars.iv1294 = phi i3 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ] -; DEBUG: Cost of Invalid for VF vscale x 1: WIDEN-CAST ir<%zexti3> = zext ir<%indvars.iv1294> to i64 +; DEBUG: Cost of Invalid for VF vscale x 1: EMIT ir<%zexti3> = zext ir<%indvars.iv1294> to i64 define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-LABEL: define void @induction_i3_zext( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index 55f82fd55daf4..ede528f76d261 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -76,7 +76,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]] ; CHECK-NEXT: WIDEN [[IDX:.*]] = load [[VECP_IDX]] -; CHECK-NEXT: WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64 +; CHECK-NEXT: EMIT [[EXT_IDX:.*]] = zext [[IDX]] to i64 ; CHECK-NEXT: WIDEN-GEP Inv[Var] [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] ; CHECK-NEXT: WIDEN-HISTOGRAM buckets: [[GEP_BUCKET]], inc: ir<1> ; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index 7bc606f5c61b3..17e5fb3e827bf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -8,14 +8,14 @@ target triple = "aarch64-unknown-linux-gnu" define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { ; CHECK-COST-LABEL: LV: Checking a loop in 'zext_i8_i16' -; CHECK-COST: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 2 for VF 16: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF vscale x 1: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF vscale x 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 -; CHECK-COST: Cost of 0 for VF vscale x 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF 2: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF 4: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF 8: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 2 for VF 16: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF vscale x 1: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF vscale x 2: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF vscale x 4: EMIT ir<%conv> = zext ir<%0> to i16 +; CHECK-COST: Cost of 0 for VF vscale x 8: EMIT ir<%conv> = zext ir<%0> to i16 ; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32 ; CHECK-LABEL: define void @zext_i8_i16 ; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { @@ -85,14 +85,14 @@ exit: ; preds = %for.body define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { ; CHECK-COST-LABEL: LV: Checking a loop in 'sext_i8_i16' -; CHECK-COST: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF 8: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 2 for VF 16: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF vscale x 1: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF vscale x 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 -; CHECK-COST: Cost of 0 for VF vscale x 8: WIDEN-CAST ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF 2: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF 4: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF 8: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 2 for VF 16: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF vscale x 1: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF vscale x 2: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 1 for VF vscale x 4: EMIT ir<%conv> = sext ir<%0> to i16 +; CHECK-COST: Cost of 0 for VF vscale x 8: EMIT ir<%conv> = sext ir<%0> to i16 ; CHECK-LABEL: define void @sext_i8_i16 ; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index a880bea2c52d1..85eccf179749e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -26,11 +26,11 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> ; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]> -; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 +; CHECK-NEXT: EMIT ir<%ext.a> = zext ir<%load.a> to i32 ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b> ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]> -; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 +; CHECK-NEXT: EMIT ir<%ext.b> = zext ir<%load.b> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> ; CHECK-NEXT: PARTIAL-REDUCE ir<[[REDUCE]]> = add ir<%mul>, ir<[[ACC]]> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -92,11 +92,11 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> ; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]> -; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32 +; CHECK-NEXT: EMIT ir<%ext.a> = zext ir<%load.a> to i32 ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b> ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]> -; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 +; CHECK-NEXT: EMIT ir<%ext.b> = zext ir<%load.b> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> ; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%mul>, ir<%accum> ; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index a119707bed120..1339d8fba7ea5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -25,7 +25,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double +; CHECK-NEXT: EMIT ir<%conv> = fpext ir<%l> to double ; CHECK-NEXT: WIDEN-CALL ir<%s> = call reassoc nnan ninf nsz arcp contract afn @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> @@ -71,7 +71,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double +; CHECK-NEXT: EMIT ir<%conv> = fpext ir<%l> to double ; CHECK-NEXT: WIDEN-INTRINSIC ir<%s> = call reassoc nnan ninf nsz arcp contract afn llvm.sin(ir<%conv>) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll index b22910316d7cb..064a92c66714a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll @@ -27,7 +27,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<%3> ; CHECK: Cost of 0 for VF 2: vp<%4> = vector-pointer ir<%arrayidx> ; CHECK: Cost of 18 for VF 2: WIDEN ir<%1> = load vp<%4> -; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%1> to i32 +; CHECK: Cost of 4 for VF 2: EMIT ir<%conv> = sext ir<%1> to i32 ; CHECK: Cost of 20 for VF 2: WIDEN ir<%cmp2> = icmp sgt ir<%conv>, ir<%conv1> ; CHECK: Cost of 26 for VF 2: WIDEN ir<%conv6> = add ir<%1>, ir<%0> ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx7> = getelementptr ir<%d>, vp<%3> @@ -44,7 +44,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 0 for VF 4: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<%3> ; CHECK: Cost of 0 for VF 4: vp<%4> = vector-pointer ir<%arrayidx> ; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load vp<%4> -; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%1> to i32 +; CHECK: Cost of 0 for VF 4: EMIT ir<%conv> = sext ir<%1> to i32 ; CHECK: Cost of 2 for VF 4: WIDEN ir<%cmp2> = icmp sgt ir<%conv>, ir<%conv1> ; CHECK: Cost of 2 for VF 4: WIDEN ir<%conv6> = add ir<%1>, ir<%0> ; CHECK: Cost of 0 for VF 4: CLONE ir<%arrayidx7> = getelementptr ir<%d>, vp<%3> @@ -61,7 +61,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 0 for VF 8: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<%3> ; CHECK: Cost of 0 for VF 8: vp<%4> = vector-pointer ir<%arrayidx> ; CHECK: Cost of 2 for VF 8: WIDEN ir<%1> = load vp<%4> -; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv> = sext ir<%1> to i32 +; CHECK: Cost of 2 for VF 8: EMIT ir<%conv> = sext ir<%1> to i32 ; CHECK: Cost of 36 for VF 8: WIDEN ir<%cmp2> = icmp sgt ir<%conv>, ir<%conv1> ; CHECK: Cost of 2 for VF 8: WIDEN ir<%conv6> = add ir<%1>, ir<%0> ; CHECK: Cost of 0 for VF 8: CLONE ir<%arrayidx7> = getelementptr ir<%d>, vp<%3> @@ -144,15 +144,15 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> ; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> ; CHECK: Cost of 18 for VF 2: WIDEN ir<%0> = load vp<[[VEC_PTR]]> -; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 +; CHECK: Cost of 4 for VF 2: EMIT ir<%conv1> = sext ir<%0> to i32 ; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.2 ; CHECK: Cost of 18 for VF 2: WIDEN ir<%1> = load vp<[[VEC_PTR2]]> -; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 +; CHECK: Cost of 4 for VF 2: EMIT ir<%conv3> = sext ir<%1> to i32 ; CHECK: Cost of 26 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> ; CHECK: Cost of 18 for VF 2: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> ; CHECK: Cost of 0 for VF 2: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> ; CHECK: Cost of 22 for VF 2: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> -; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 +; CHECK: Cost of 0 for VF 2: EMIT ir<%conv4> = trunc ir<%spec.select.i> to i8 ; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR3:%.+]]> = vector-pointer vp<%next.gep>.1 ; CHECK: Cost of 18 for VF 2: WIDEN store vp<[[VEC_PTR3]]>, ir<%conv4> ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<%0> @@ -176,15 +176,15 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> ; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR1:%.+]]> = vector-pointer vp<%next.gep> ; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load vp<[[VEC_PTR1]]> -; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 +; CHECK: Cost of 0 for VF 4: EMIT ir<%conv1> = sext ir<%0> to i32 ; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.2 ; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load vp<[[VEC_PTR2]]> -; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 +; CHECK: Cost of 0 for VF 4: EMIT ir<%conv3> = sext ir<%1> to i32 ; CHECK: Cost of 2 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> ; CHECK: Cost of 2 for VF 4: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> ; CHECK: Cost of 0 for VF 4: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> ; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> -; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 +; CHECK: Cost of 0 for VF 4: EMIT ir<%conv4> = trunc ir<%spec.select.i> to i8 ; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.1 ; CHECK: Cost of 2 for VF 4: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv4> ; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<%0> @@ -208,15 +208,15 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> ; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR1:%.+]]> = vector-pointer vp<%next.gep> ; CHECK: Cost of 2 for VF 8: WIDEN ir<%0> = load vp<[[VEC_PTR1]]> -; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 +; CHECK: Cost of 2 for VF 8: EMIT ir<%conv1> = sext ir<%0> to i32 ; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.2 ; CHECK: Cost of 2 for VF 8: WIDEN ir<%1> = load vp<[[VEC_PTR2]]> -; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 +; CHECK: Cost of 2 for VF 8: EMIT ir<%conv3> = sext ir<%1> to i32 ; CHECK: Cost of 4 for VF 8: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> ; CHECK: Cost of 4 for VF 8: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> ; CHECK: Cost of 0 for VF 8: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> ; CHECK: Cost of 4 for VF 8: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> -; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 +; CHECK: Cost of 2 for VF 8: EMIT ir<%conv4> = trunc ir<%spec.select.i> to i8 ; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR3:%.+]]> = vector-pointer vp<%next.gep>.1 ; CHECK: Cost of 2 for VF 8: WIDEN store vp<[[VEC_PTR3]]>, ir<%conv4> ; CHECK: Cost of 0 for VF 8: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<{{.+}} @@ -240,15 +240,15 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> ; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> ; CHECK: Cost of 2 for VF 16: WIDEN ir<%0> = load vp<[[VEC_PTR]]> -; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 +; CHECK: Cost of 6 for VF 16: EMIT ir<%conv1> = sext ir<%0> to i32 ; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR1:%.+]]> = vector-pointer vp<%next.gep>.2 ; CHECK: Cost of 2 for VF 16: WIDEN ir<%1> = load vp<[[VEC_PTR1]]> -; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 +; CHECK: Cost of 6 for VF 16: EMIT ir<%conv3> = sext ir<%1> to i32 ; CHECK: Cost of 8 for VF 16: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> ; CHECK: Cost of 8 for VF 16: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> ; CHECK: Cost of 0 for VF 16: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> ; CHECK: Cost of 8 for VF 16: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> -; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 +; CHECK: Cost of 6 for VF 16: EMIT ir<%conv4> = trunc ir<%spec.select.i> to i8 ; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.1 ; CHECK: Cost of 2 for VF 16: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv4> ; CHECK: Cost of 0 for VF 16: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<{{.+}}> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll index a213608857728..2e8bc164e7b60 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll @@ -34,7 +34,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -90,7 +90,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -146,7 +146,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -202,7 +202,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -255,7 +255,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -306,7 +306,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -353,13 +353,13 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double +; IF-EVL-NEXT: EMIT ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LRINT:%.+]]> = call llvm.lrint(ir<[[FPEXT]]>) -; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LRINT]]> to i32 +; IF-EVL-NEXT: EMIT ir<[[TRUNC:%.+]]> = trunc ir<[[LRINT]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -408,13 +408,13 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double +; IF-EVL-NEXT: EMIT ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LLRINT:%.+]]> = call llvm.llrint(ir<[[FPEXT]]>) -; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LLRINT]]> to i32 +; IF-EVL-NEXT: EMIT ir<[[TRUNC:%.+]]> = trunc ir<[[LLRINT]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -467,7 +467,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll index b8f20aa670b5c..5d11f217dd3e4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll @@ -26,11 +26,11 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64 +; IF-EVL-NEXT: EMIT ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -79,11 +79,11 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64 +; IF-EVL-NEXT: EMIT ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -130,11 +130,11 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16 +; IF-EVL-NEXT: EMIT ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -181,11 +181,11 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double +; IF-EVL-NEXT: EMIT ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -232,11 +232,11 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float +; IF-EVL-NEXT: EMIT ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -283,11 +283,11 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float +; IF-EVL-NEXT: EMIT ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -334,11 +334,11 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float +; IF-EVL-NEXT: EMIT ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -385,11 +385,11 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = fptosi ir<[[LD1]]> to i32 +; IF-EVL-NEXT: EMIT ir<[[FPTOSI:%.+]]> = fptosi ir<[[LD1]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -436,11 +436,11 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOUI:%.+]]> = fptoui ir<[[LD1]]> to i32 +; IF-EVL-NEXT: EMIT ir<[[FPTOUI:%.+]]> = fptoui ir<[[LD1]]> to i32 ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -487,11 +487,11 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-CAST ir<[[INTTOPTR:%.+]]> = inttoptr ir<[[LD1]]> to ptr +; IF-EVL-NEXT: EMIT ir<[[INTTOPTR:%.+]]> = inttoptr ir<[[LD1]]> to ptr ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index 0bcfe13832ae7..75f2499f935d4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -19,7 +19,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: Successor(s): vector.ph ; IF-EVL-EMPTY: ; IF-EVL: vector.ph: -; IF-EVL-NEXT: SCALAR-CAST vp<[[VF32:%[0-9]+]]> = trunc vp<[[VF]]> to i32 +; IF-EVL-NEXT: EMIT vp<[[VF32:%[0-9]+]]> = trunc vp<[[VF]]> to i32 ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL-EMPTY: ; IF-EVL: vector loop: { @@ -39,7 +39,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 3594b3f047363..04eff34854806 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -51,7 +51,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) -; IF-EVL-OUTLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-OUTLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -102,7 +102,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) -; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-INLOOP-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 4cef3f029e20f..3b5ae7811a83b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -39,7 +39,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> -; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 67be80393d829..adea9ca03feb2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -50,7 +50,7 @@ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> - ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 + ; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, ir<[[VTC]]> diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll index 28f8988bd853a..eb3736183b470 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll @@ -38,16 +38,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<%4> ; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%arrayidx> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%0> = load vp<%5> -; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32 +; CHECK: Cost of 0 for VF 2: EMIT ir<%conv> = zext ir<%0> to i32 ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%b>, vp<%4> ; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%arrayidx2> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%1> = load vp<%6> -; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv3> = zext ir<%1> to i32 +; CHECK: Cost of 0 for VF 2: EMIT ir<%conv3> = zext ir<%1> to i32 ; CHECK: Cost of 0 for VF 2: WIDEN ir<%conv4> = and ir<%sum.013>, ir<255> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%add> = add ir<%conv>, ir<%conv4> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%add5> = add ir<%add>, ir<%conv3> -; CHECK: Cost of 0 for VF 2: WIDEN-CAST vp<%7> = trunc ir<%add5> to i8 -; CHECK: Cost of 0 for VF 2: WIDEN-CAST vp<%8> = zext vp<%7> to i32 +; CHECK: Cost of 0 for VF 2: EMIT vp<%7> = trunc ir<%add5> to i8 +; CHECK: Cost of 0 for VF 2: EMIT vp<%8> = zext vp<%7> to i32 ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%0> ; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%1> ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll index 8661d86f554b8..129e38556a66a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll @@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK: cost of 4 for VF 1 For instruction: %conv = uitofp i64 %tmp to double -; CHECK: Cost of 5 for VF 2: WIDEN-CAST ir<%conv> = uitofp ir<%tmp> to double -; CHECK: Cost of 10 for VF 4: WIDEN-CAST ir<%conv> = uitofp ir<%tmp> to double +; CHECK: Cost of 5 for VF 2: EMIT ir<%conv> = uitofp ir<%tmp> to double +; CHECK: Cost of 10 for VF 4: EMIT ir<%conv> = uitofp ir<%tmp> to double define void @uint64_to_double_cost(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) nounwind { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 32d32a64049ac..575edd1ebc2ae 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -188,9 +188,9 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]> ; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next> -; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 +; CHECK-NEXT: EMIT ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev> -; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 +; CHECK-NEXT: EMIT ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -266,9 +266,9 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%l>, ir<2> ; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next> -; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 +; CHECK-NEXT: EMIT ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev> -; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 +; CHECK-NEXT: EMIT ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 0b2e7fe484390..efcf69cd319d9 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -47,7 +47,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32 +; CHECK-NEXT: EMIT ir<%conv> = sext vp<[[PRED1]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -129,7 +129,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 +; CHECK-NEXT: EMIT ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -217,7 +217,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 +; CHECK-NEXT: EMIT ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -326,7 +326,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32 +; CHECK-NEXT: EMIT ir<%conv> = sext vp<[[PRED]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -416,7 +416,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 +; CHECK-NEXT: EMIT ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index c3164762e8130..e98c3fd6b593d 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -202,14 +202,14 @@ exit: ; DBG-NEXT: Successor(s): vector.ph ; DBG-EMPTY: ; DBG-NEXT: vector.ph: -; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32 +; DBG-NEXT: EMIT vp<[[CAST:%.+]]> = trunc ir<1> to i32 ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { ; DBG-NEXT: vector.body: ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> -; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32 +; DBG-NEXT: EMIT vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32 ; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]> ; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]> ; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 00d8de67a3b40..39baf69406b84 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -1104,7 +1104,7 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%p>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l> +; CHECK-NEXT: EMIT ir<%zext> = zext nneg ir<%l> ; CHECK-NEXT: REPLICATE store ir<%zext>, ir<%p1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>