-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[VPlan] Add new VPScalarCastRecipe, use for IV & step trunc. #78113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
36b085f
f5f961e
6b3e52e
9331a45
9988d78
fe5ec9e
5500bdb
18cd7d4
f1f1eff
d382232
6efa577
e756798
3c31111
dc7990d
adc5441
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -859,6 +859,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
case VPRecipeBase::VPWidenIntOrFpInductionSC: | ||
case VPRecipeBase::VPWidenPointerInductionSC: | ||
case VPRecipeBase::VPReductionPHISC: | ||
case VPRecipeBase::VPScalarCastSC: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit (independent of this patch): would be good to keep in order. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, will do separately. |
||
return true; | ||
case VPRecipeBase::VPInterleaveSC: | ||
case VPRecipeBase::VPBranchOnMaskSC: | ||
|
@@ -1338,6 +1339,32 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { | |
Type *getResultType() const { return ResultTy; } | ||
}; | ||
|
||
/// VPScalarCastRecipe is a recipe to create scalar cast instructions. | ||
class VPScalarCastRecipe : public VPSingleDefRecipe { | ||
Instruction::CastOps Opcode; | ||
|
||
/// Result type for the cast. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: comment adds little knowledge if any. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed, thanks! |
||
Type *ResultTy; | ||
|
||
Value *generate(VPTransformState &State, unsigned Part); | ||
|
||
public: | ||
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) | ||
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode), | ||
ResultTy(ResultTy) {} | ||
|
||
~VPScalarCastRecipe() override = default; | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC) | ||
|
||
void execute(VPTransformState &State) override; | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
}; | ||
|
||
/// A recipe for widening Call instructions. | ||
class VPWidenCallRecipe : public VPSingleDefRecipe { | ||
/// ID of the vector intrinsic to call when widening the call. If set the | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -230,7 +230,11 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { | |
return V->getUnderlyingValue()->getType(); | ||
}) | ||
.Case<VPWidenCastRecipe>( | ||
[](const VPWidenCastRecipe *R) { return R->getResultType(); }); | ||
[](const VPWidenCastRecipe *R) { return R->getResultType(); }) | ||
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding Expand SCEV case to inferScalarType is independent of this patch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is needed to infer the type of the step in the new code in VPlanTransforms.cpp ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So now that VPDerivedIVRecipe simply mirrors the type of its first start-value operand, should it join VPCanonicalIVPHIRecipe et al.'s or VPPredInstPHIRecipe et al.'s cases above and remove its getScalarType() method? Possibly as a follow-up. Should VPWidenIntOrFpInductionRecipe's trunc also utilize VPScalarCastRecipe, and retire its getScalarType(), possibly as a follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. VPDerivedIVRecipe getScalarType would be good to remove, will do as follow-up.
|
||
return R->getSCEV()->getType(); | ||
}); | ||
|
||
assert(ResultTy && "could not infer type for the given VPValue"); | ||
CachedTypes[V] = ResultTy; | ||
return ResultTy; | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -117,6 +117,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { | |||||
switch (getVPDefID()) { | ||||||
case VPDerivedIVSC: | ||||||
case VPPredInstPHISC: | ||||||
case VPScalarCastSC: | ||||||
return false; | ||||||
case VPInstructionSC: | ||||||
switch (cast<VPInstruction>(this)->getOpcode()) { | ||||||
|
@@ -1117,13 +1118,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { | |||||
|
||||||
// Ensure step has the same type as that of scalar IV. | ||||||
Type *BaseIVTy = BaseIV->getType()->getScalarType(); | ||||||
if (BaseIVTy != Step->getType()) { | ||||||
// TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to | ||||||
// avoid separate truncate here. | ||||||
assert(Step->getType()->isIntegerTy() && | ||||||
"Truncation requires an integer step"); | ||||||
Step = State.Builder.CreateTrunc(Step, BaseIVTy); | ||||||
} | ||||||
assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); | ||||||
|
||||||
// We build scalar steps for both integer and floating-point induction | ||||||
// variables. Here, we determine the kind of arithmetic we will perform. | ||||||
|
@@ -1469,6 +1464,47 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, | |||||
} | ||||||
#endif | ||||||
|
||||||
Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { | ||||||
assert(vputils::onlyFirstLaneUsed(this) && | ||||||
"Codegen only implemented for first lane."); | ||||||
switch (Opcode) { | ||||||
case Instruction::SExt: | ||||||
case Instruction::ZExt: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only Trunc is currently being used, still. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, could remove if preferred, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be good to either remove or make a note that SExt and ZExt are currently unused/dead cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a note, thanks! |
||||||
case Instruction::Trunc: { | ||||||
Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); | ||||||
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); | ||||||
} | ||||||
default: | ||||||
llvm_unreachable("opcode not implemented yet"); | ||||||
} | ||||||
} | ||||||
|
||||||
void VPScalarCastRecipe ::execute(VPTransformState &State) { | ||||||
bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
this checks more than uniformity across UFs. And if it is invariance we're checking, we can alternatively check if the recipe itself resides outside vector regions (e.g., has been LICM'd), rather than all of its operands. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The latest version extends the patch to use it also to truncate VPDerivedIVRecipe, if needed. For that, it also checks if the operands are VPDerivedIV/VPCanonicalIVPHIs, which should be considered uniform across all VFs & UFs, unless I am missing something. |
||||||
return Op->isDefinedOutsideVectorRegions(); | ||||||
}); | ||||||
for (unsigned Part = 0; Part != State.UF; ++Part) { | ||||||
Value *Res; | ||||||
// Only generate a single instance, if the recipe is uniform across all UFs. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only a single instance is going to be generated, in the preheader? Recipe is uniform across all UFs by (current) definition (and even more so - invariant). Can assert UniformAcrossUFs holds, along with onlyFirstLaneUsed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The implementation here is intentionally kept generic for any cast where only the first lane is used, so it can be used directly in #76172 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is currently untested / dead code. The value returned by an EVL recipe should arguably be of the same type as that of its first operand, even if the underlying intrinsic it currently wraps insists on always returning an i32? I.e., that case should arguably be fixed by performing a trunc/sext/zext as needed when providing the adequate EVL value given a current Phi/remaining-trip-count values, to facilitate adding/subtracting them together. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated version of the patch now also uses it for truncating derived IV/canonical IV if needed. Those are still uniform across VFs & UFs, but not invariant. I suppose EVL at the moment can also be considered uniform across VFs and UFs. Both the if/else sides should be executed by tests. |
||||||
if (Part > 0 && UniformAcrossUFs) | ||||||
Res = State.get(this, VPIteration(0, 0)); | ||||||
else | ||||||
Res = generate(State, Part); | ||||||
State.set(this, Res, VPIteration(Part, 0)); | ||||||
} | ||||||
} | ||||||
|
||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||
void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, | ||||||
VPSlotTracker &SlotTracker) const { | ||||||
O << Indent << "SCALAR-CAST "; | ||||||
printAsOperand(O, SlotTracker); | ||||||
O << " = " << Instruction::getOpcodeName(Opcode) << " "; | ||||||
printOperands(O, SlotTracker); | ||||||
O << " to " << *ResultTy; | ||||||
} | ||||||
#endif | ||||||
|
||||||
void VPBranchOnMaskRecipe::execute(VPTransformState &State) { | ||||||
assert(State.Instance && "Branch on Mask works only on single instance."); | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -504,6 +504,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | |||||||||
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); | ||||||||||
} | ||||||||||
|
||||||||||
VPTypeAnalysis TypeInfo(SE.getContext()); | ||||||||||
if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) { | ||||||||||
Step = new VPScalarCastRecipe(Instruction::Trunc, Step, | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this recipe only generate Trunc? No need to hold an Opcode, can be renamed VPScalarTruncRecipe. Assert that the type sizes correspond to Trunc? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, here the only use-case is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assert that the type sizes correspond to Trunc? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||||||||||
TypeInfo.inferScalarType(BaseIV)); | ||||||||||
auto *VecPreheader = | ||||||||||
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||||||
VecPreheader->appendRecipe(Step->getDefiningRecipe()); | ||||||||||
} | ||||||||||
|
||||||||||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); | ||||||||||
HeaderVPBB->insert(Steps, IP); | ||||||||||
return Steps; | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: error message missing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was left over from testing, removed!