Skip to content

Commit 35d8632

Browse files
committed
[LV][EVL] Support call instruction with EVL-vectorization
1 parent 2611132 commit 35d8632

File tree

11 files changed

+135
-75
lines changed

11 files changed

+135
-75
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,11 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx);
160160
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
161161
const TargetLibraryInfo *TLI);
162162

163+
/// Returns VP intrinsic ID for call.
164+
/// For the input call instruction it finds mapping intrinsic and returns
165+
/// its intrinsic ID, in case it does not found it return not_intrinsic.
166+
Intrinsic::ID getVPIntrinsicIDForCall(const CallInst *CI);
167+
163168
/// Given a vector and an element number, see if the scalar value is
164169
/// already around as a register, for example if it were inserted then extracted
165170
/// from the vector.

llvm/include/llvm/IR/VectorBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ class VectorBuilder {
9999
const Twine &Name = Twine());
100100

101101
/// Emit a VP reduction intrinsic call for recurrence kind.
102-
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
102+
/// \param ID The intrinsic ID of call Intrinsic
103103
/// \param ValTy The type of operand which the reduction operation is
104104
/// performed.
105105
/// \param VecOpArray The operand list.
106-
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
106+
Value *createSimpleIntrinsic(Intrinsic::ID RdxID, Type *ValTy,
107107
ArrayRef<Value *> VecOpArray,
108108
const Twine &Name = Twine());
109109
};

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,15 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
169169
return Intrinsic::not_intrinsic;
170170
}
171171

172+
Intrinsic::ID llvm::getVPIntrinsicIDForCall(const CallInst *CI) {
173+
const Function *F = CI->getCalledFunction();
174+
if (!F)
175+
return Intrinsic::not_intrinsic;
176+
177+
if (F->isIntrinsic())
178+
return VPIntrinsic::getForIntrinsic(F->getIntrinsicID());
179+
}
180+
172181
/// Given a vector and an element number, see if the scalar value is
173182
/// already around as a register, for example if it were inserted then extracted
174183
/// from the vector.

llvm/lib/IR/VectorBuilder.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
6060
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
6161
}
6262

63-
Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
64-
Type *ValTy,
63+
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
6564
ArrayRef<Value *> InstOpArray,
6665
const Twine &Name) {
67-
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
68-
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
69-
"No VPIntrinsic for this reduction");
66+
auto VPID = VPIntrinsic::getForIntrinsic(ID);
67+
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
68+
"No VPIntrinsic for this Intrinsic");
7069
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
7170
}
7271

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,6 +1073,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
10731073
return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
10741074
break;
10751075
}
1076+
// TODO: Need push a new patch
1077+
case Intrinsic::vp_smax:
1078+
case Intrinsic::vp_smin:
1079+
case Intrinsic::vp_umax:
1080+
case Intrinsic::vp_umin: {
1081+
// return LT.first;
1082+
return 1;
1083+
}
10761084
// vp int cast ops.
10771085
case Intrinsic::vp_trunc:
10781086
case Intrinsic::vp_zext:

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13001300
Type *SrcEltTy = SrcTy->getElementType();
13011301
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
13021302
Value *Ops[] = {Iden, Src};
1303-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1303+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13041304
}
13051305

13061306
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
13431343
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
13441344
auto *SrcTy = cast<VectorType>(Src->getType());
13451345
Value *Ops[] = {Start, Src};
1346-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1346+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13471347
}
13481348

13491349
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8351,7 +8351,6 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83518351
return nullptr;
83528352

83538353
SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
8354-
83558354
// Is it beneficial to perform intrinsic call compared to lib call?
83568355
bool ShouldUseVectorIntrinsic =
83578356
ID && LoopVectorizationPlanner::getDecisionAndClampRange(

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,20 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
17081708
/// Returns true if the intrinsic may write to memory.
17091709
bool mayWriteToMemory() const { return MayWriteToMemory; }
17101710

1711+
operand_range arg_operands() {
1712+
unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
1713+
? getNumOperands() - 1
1714+
: getNumOperands();
1715+
return make_range(op_begin(), op_begin() + argNum);
1716+
}
1717+
1718+
const_operand_range arg_operands() const {
1719+
unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
1720+
? getNumOperands() - 1
1721+
: getNumOperands();
1722+
return make_range(op_begin(), op_begin() + argNum);
1723+
}
1724+
17111725
/// Returns true if the intrinsic may have side-effects.
17121726
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
17131727

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
970970
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
971971
TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
972972
SmallVector<Value *, 4> Args;
973-
for (const auto &I : enumerate(operands())) {
973+
for (const auto &I : enumerate(arg_operands())) {
974974
// Some intrinsics have a scalar argument - don't replace it with a
975975
// vector.
976976
Value *Arg;
@@ -983,18 +983,33 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
983983
Args.push_back(Arg);
984984
}
985985

986-
// Use vector version of the intrinsic.
987-
Module *M = State.Builder.GetInsertBlock()->getModule();
988-
Function *VectorF =
989-
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
990-
assert(VectorF && "Can't retrieve vector intrinsic.");
991-
986+
CallInst *V = nullptr;
992987
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
993988
SmallVector<OperandBundleDef, 1> OpBundles;
994989
if (CI)
995990
CI->getOperandBundlesAsDefs(OpBundles);
996991

997-
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
992+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
993+
// Use vector version of the vector predicate Intrinsic
994+
IRBuilderBase &BuilderIR = State.Builder;
995+
VectorBuilder VBuilder(BuilderIR);
996+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
997+
VBuilder.setMask(Mask).setEVL(
998+
State.get(getOperand(getNumOperands() - 1), /*NeedsScalar=*/true));
999+
auto *TyReturn = VectorType::get(getResultType(), State.VF);
1000+
Value *VPInst = VBuilder.createSimpleIntrinsic(VectorIntrinsicID, TyReturn,
1001+
Args, "vp.call");
1002+
if (VPInst) {
1003+
V = cast<CallInst>(VPInst);
1004+
}
1005+
} else {
1006+
// Use vector version of the intrinsic.
1007+
Module *M = State.Builder.GetInsertBlock()->getModule();
1008+
Function *VectorF =
1009+
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1010+
assert(VectorF && "Can't retrieve vector intrinsic.");
1011+
V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1012+
}
9981013

9991014
setFlags(V);
10001015

@@ -1013,7 +1028,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10131028
// clear Arguments.
10141029
// TODO: Rework TTI interface to be independent of concrete IR values.
10151030
SmallVector<const Value *> Arguments;
1016-
for (const auto &[Idx, Op] : enumerate(operands())) {
1031+
for (const auto &[Idx, Op] : enumerate(arg_operands())) {
10171032
auto *V = Op->getUnderlyingValue();
10181033
if (!V) {
10191034
if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,17 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
13811381
return nullptr;
13821382
return new VPWidenEVLRecipe(*W, EVL);
13831383
})
1384+
.Case<VPWidenIntrinsicRecipe>(
1385+
[&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
1386+
auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
1387+
SmallVector<VPValue *> Ops(CInst->operands());
1388+
Ops.push_back(&EVL);
1389+
Intrinsic::ID VPID = getVPIntrinsicIDForCall(CI);
1390+
if (VPID == Intrinsic::not_intrinsic)
1391+
return nullptr;
1392+
return new VPWidenIntrinsicRecipe(
1393+
*CI, VPID, Ops, CI->getType(), CI->getDebugLoc());
1394+
})
13841395
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
13851396
VPValue *NewMask = GetNewMask(Red->getCondOp());
13861397
return new VPReductionEVLRecipe(*Red, EVL, NewMask);

0 commit comments

Comments
 (0)