Skip to content

Commit 177de6d

Browse files
committed
[LV][EVL] Support call instruction with EVL-vectorization
1 parent fcf02bc commit 177de6d

File tree

14 files changed

+115
-31
lines changed

14 files changed

+115
-31
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx);
160160
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
161161
const TargetLibraryInfo *TLI);
162162

163+
/// Returns VP intrinsic ID for call.
164+
/// For the input call instruction it finds mapping intrinsic and returns
165+
/// its intrinsic ID, in case it does not found it return not_intrinsic.
166+
Intrinsic::ID getVPIntrinsicIDForCall(const CallInst *CI,
167+
const TargetLibraryInfo *TLI);
168+
163169
/// Given a vector and an element number, see if the scalar value is
164170
/// already around as a register, for example if it were inserted then extracted
165171
/// from the vector.

llvm/include/llvm/IR/VectorBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ class VectorBuilder {
9999
const Twine &Name = Twine());
100100

101101
/// Emit a VP reduction intrinsic call for recurrence kind.
102-
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
102+
/// \param ID The intrinsic ID of call Intrinsic
103103
/// \param ValTy The type of operand which the reduction operation is
104104
/// performed.
105105
/// \param VecOpArray The operand list.
106-
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
106+
Value *createSimpleIntrinsic(Intrinsic::ID RdxID, Type *ValTy,
107107
ArrayRef<Value *> VecOpArray,
108108
const Twine &Name = Twine());
109109
};

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,13 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
169169
return Intrinsic::not_intrinsic;
170170
}
171171

172+
Intrinsic::ID llvm::getVPIntrinsicIDForCall(const CallInst *CI,
173+
const TargetLibraryInfo *TLI) {
174+
Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);
175+
176+
return VPIntrinsic::getForIntrinsic(ID);
177+
}
178+
172179
/// Given a vector and an element number, see if the scalar value is
173180
/// already around as a register, for example if it were inserted then extracted
174181
/// from the vector.

llvm/lib/IR/VectorBuilder.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
6060
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
6161
}
6262

63-
Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
64-
Type *ValTy,
63+
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
6564
ArrayRef<Value *> InstOpArray,
6665
const Twine &Name) {
67-
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
68-
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
69-
"No VPIntrinsic for this reduction");
66+
auto VPID = VPIntrinsic::getForIntrinsic(ID);
67+
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
68+
"No VPIntrinsic for this Intrinsic");
7069
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
7170
}
7271

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
10751075
return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
10761076
break;
10771077
}
1078+
// TODO: Need push a new patch
1079+
case Intrinsic::vp_smax:
1080+
case Intrinsic::vp_smin:
1081+
case Intrinsic::vp_umax:
1082+
case Intrinsic::vp_umin: {
1083+
// return LT.first;
1084+
return 1;
1085+
}
10781086
// vp int cast ops.
10791087
case Intrinsic::vp_trunc:
10801088
case Intrinsic::vp_zext:

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13001300
Type *SrcEltTy = SrcTy->getElementType();
13011301
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
13021302
Value *Ops[] = {Iden, Src};
1303-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1303+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13041304
}
13051305

13061306
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
13431343
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
13441344
auto *SrcTy = cast<VectorType>(Src->getType());
13451345
Value *Ops[] = {Start, Src};
1346-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1346+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13471347
}
13481348

13491349
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8349,7 +8349,6 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83498349
return nullptr;
83508350

83518351
SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
8352-
83538352
// Is it beneficial to perform intrinsic call compared to lib call?
83548353
bool ShouldUseVectorIntrinsic =
83558354
ID && LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8690,7 +8689,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
86908689
// TODO: try to put it close to addActiveLaneMask().
86918690
// Discard the plan if it is not EVL-compatible
86928691
if (CM.foldTailWithEVL() &&
8693-
!VPlanTransforms::tryAddExplicitVectorLength(*Plan))
8692+
!VPlanTransforms::tryAddExplicitVectorLength(*Plan, *TLI))
86948693
break;
86958694
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
86968695
VPlans.push_back(std::move(Plan));

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1678,9 +1678,24 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
16781678
/// Returns true if the intrinsic may write to memory.
16791679
bool mayWriteToMemory() const { return MayWriteToMemory; }
16801680

1681+
operand_range arg_operands() {
1682+
unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
1683+
? getNumOperands() - 1
1684+
: getNumOperands();
1685+
return make_range(op_begin(), op_begin() + argNum);
1686+
}
1687+
1688+
const_operand_range arg_operands() const {
1689+
unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
1690+
? getNumOperands() - 1
1691+
: getNumOperands();
1692+
return make_range(op_begin(), op_begin() + argNum);
1693+
}
1694+
16811695
/// Returns true if the intrinsic may have side-effects.
16821696
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
16831697

1698+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
16841699
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16851700
/// Print the recipe.
16861701
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
6161
case Instruction::ICmp:
6262
case VPInstruction::ActiveLaneMask:
6363
return inferScalarType(R->getOperand(1));
64+
case VPInstruction::ExplicitVectorLength:
65+
return Type::getIntNTy(Ctx, 32);
6466
case VPInstruction::FirstOrderRecurrenceSplice:
6567
case VPInstruction::Not:
6668
return SetResultTyFromOp();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -962,17 +962,21 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
962962
void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
963963
assert(State.VF.isVector() && "not widening");
964964
State.setDebugLocFrom(getDebugLoc());
965-
965+
Intrinsic::ID FuncID =
966+
VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
967+
? VPIntrinsic::getFunctionalIntrinsicIDForVP(VectorIntrinsicID)
968+
.value()
969+
: VectorIntrinsicID;
966970
SmallVector<Type *, 2> TysForDecl;
967971
// Add return type if intrinsic is overloaded on it.
968-
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
972+
if (isVectorIntrinsicWithOverloadTypeAtArg(FuncID, -1))
969973
TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
970974
SmallVector<Value *, 4> Args;
971-
for (const auto &I : enumerate(operands())) {
975+
for (const auto &I : enumerate(arg_operands())) {
972976
// Some intrinsics have a scalar argument - don't replace it with a
973977
// vector.
974978
Value *Arg;
975-
if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
979+
if (isVectorIntrinsicWithScalarOpAtArg(FuncID, I.index()))
976980
Arg = State.get(I.value(), VPLane(0));
977981
else
978982
Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
@@ -981,18 +985,34 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
981985
Args.push_back(Arg);
982986
}
983987

984-
// Use vector version of the intrinsic.
985-
Module *M = State.Builder.GetInsertBlock()->getModule();
986-
Function *VectorF =
987-
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
988-
assert(VectorF && "Can't retrieve vector intrinsic.");
989-
988+
CallInst *V = nullptr;
990989
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
991990
SmallVector<OperandBundleDef, 1> OpBundles;
992991
if (CI)
993992
CI->getOperandBundlesAsDefs(OpBundles);
994993

995-
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
994+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
995+
// Use vector version of the vector predicate Intrinsic
996+
IRBuilderBase &BuilderIR = State.Builder;
997+
VectorBuilder VBuilder(BuilderIR);
998+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
999+
// VPValue EVL = getOperand(getNumOperands() - 1);
1000+
VBuilder.setMask(Mask).setEVL(
1001+
State.get(getOperand(getNumOperands() - 1), /*NeedsScalar=*/true));
1002+
auto *TyReturn = VectorType::get(getResultType(), State.VF);
1003+
Value *VPInst = VBuilder.createSimpleIntrinsic(VectorIntrinsicID, TyReturn,
1004+
Args, "vp.call");
1005+
if (VPInst) {
1006+
V = cast<CallInst>(VPInst);
1007+
}
1008+
} else {
1009+
// Use vector version of the intrinsic.
1010+
Module *M = State.Builder.GetInsertBlock()->getModule();
1011+
Function *VectorF =
1012+
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1013+
assert(VectorF && "Can't retrieve vector intrinsic.");
1014+
V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1015+
}
9961016

9971017
setFlags(V);
9981018

@@ -1011,7 +1031,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10111031
// clear Arguments.
10121032
// TODO: Rework TTI interface to be independent of concrete IR values.
10131033
SmallVector<const Value *> Arguments;
1014-
for (const auto &[Idx, Op] : enumerate(operands())) {
1034+
for (const auto &[Idx, Op] : enumerate(arg_operands())) {
10151035
auto *V = Op->getUnderlyingValue();
10161036
if (!V) {
10171037
if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
@@ -1042,6 +1062,14 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
10421062
return Intrinsic::getBaseName(VectorIntrinsicID);
10431063
}
10441064

1065+
bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
1066+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1067+
// Vector predication intrinsics only demand the the first lane the last
1068+
// operand (the EVL operand).
1069+
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1070+
Op == getOperand(getNumOperands() - 1);
1071+
}
1072+
10451073
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10461074
void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent,
10471075
VPSlotTracker &SlotTracker) const {

0 commit comments

Comments
 (0)