Skip to content

Commit 2df7409

Browse files
committed
VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.
1 parent 734120d commit 2df7409

File tree

5 files changed

+131
-61
lines changed

5 files changed

+131
-61
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
2626

2727
#include "VPlan.h"
28+
#include "VPlanConstantFolder.h"
2829
#include "llvm/Support/InstructionCost.h"
2930

3031
namespace llvm {
@@ -45,7 +46,8 @@ struct VFRange;
4546
class VPBuilder {
4647
VPBasicBlock *BB = nullptr;
4748
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
48-
[[maybe_unused]] VPlan &Plan;
49+
VPlan &Plan;
50+
VPConstantFolder Folder;
4951

5052
/// Insert \p VPI in BB at InsertPt if BB is set.
5153
template <typename T> T *tryInsertInstruction(T *R) {
@@ -178,31 +180,40 @@ class VPBuilder {
178180

179181
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
180182
const Twine &Name = "") {
183+
if (auto *V = Folder.foldNot(Operand))
184+
return Plan.getOrAddLiveIn(V);
181185
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
182186
}
183187

184188
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
185189
const Twine &Name = "") {
190+
if (auto *V = Folder.foldAnd(LHS, RHS))
191+
return Plan.getOrAddLiveIn(V);
186192
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
187193
}
188194

189195
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
190196
const Twine &Name = "") {
191-
197+
if (auto *V = Folder.foldOr(LHS, RHS))
198+
return Plan.getOrAddLiveIn(V);
192199
return tryInsertInstruction(new VPInstruction(
193200
Instruction::BinaryOps::Or, {LHS, RHS},
194201
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
195202
}
196203

197204
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
198205
const Twine &Name = "") {
206+
if (auto *V = Folder.foldLogicalAnd(LHS, RHS))
207+
return Plan.getOrAddLiveIn(V);
199208
return tryInsertInstruction(
200209
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
201210
}
202211

203212
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
204213
DebugLoc DL = {}, const Twine &Name = "",
205214
std::optional<FastMathFlags> FMFs = std::nullopt) {
215+
if (auto *V = Folder.foldSelect(Cond, TrueVal, FalseVal))
216+
return Plan.getOrAddLiveIn(V);
206217
auto *Select =
207218
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
208219
*FMFs, DL, Name)
@@ -218,17 +229,23 @@ class VPBuilder {
218229
DebugLoc DL = {}, const Twine &Name = "") {
219230
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
220231
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
232+
if (auto *V = Folder.foldCmp(Pred, A, B))
233+
return Plan.getOrAddLiveIn(V);
221234
return tryInsertInstruction(
222235
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
223236
}
224237

225-
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
226-
const Twine &Name = "") {
238+
VPValue *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
239+
const Twine &Name = "") {
240+
if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::none()))
241+
return Plan.getOrAddLiveIn(V);
227242
return tryInsertInstruction(
228243
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
229244
}
230245
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
231246
const Twine &Name = "") {
247+
if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::inBounds()))
248+
return Plan.getOrAddLiveIn(V);
232249
return tryInsertInstruction(
233250
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
234251
}
@@ -244,14 +261,18 @@ class VPBuilder {
244261
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
245262
}
246263

247-
VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
248-
Type *ResultTy, DebugLoc DL) {
264+
VPValue *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
265+
Type *ResultTy, DebugLoc DL) {
266+
if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
267+
return Plan.getOrAddLiveIn(V);
249268
return tryInsertInstruction(
250269
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
251270
}
252271

253-
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
254-
Type *ResultTy) {
272+
VPValue *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
273+
Type *ResultTy) {
274+
if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
275+
return Plan.getOrAddLiveIn(V);
255276
return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
256277
}
257278

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "VPlanValue.h"
10+
#include "llvm/IR/ConstantFold.h"
11+
#include "llvm/IR/Constants.h"
12+
13+
namespace llvm {
14+
class VPConstantFolder {
15+
private:
16+
Constant *getIRConstant(VPValue *V) const {
17+
return dyn_cast_or_null<Constant>(V->getUnderlyingValue());
18+
}
19+
20+
Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
21+
VPValue *RHS) const {
22+
auto *LC = getIRConstant(LHS);
23+
auto *RC = getIRConstant(RHS);
24+
if (LC && RC) {
25+
if (ConstantExpr::isDesirableBinOp(Opcode))
26+
return ConstantExpr::get(Opcode, LC, RC);
27+
return ConstantFoldBinaryInstruction(Opcode, LC, RC);
28+
}
29+
return nullptr;
30+
}
31+
32+
public:
33+
Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
34+
return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
35+
}
36+
37+
Value *foldOr(VPValue *LHS, VPValue *RHS) const {
38+
return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
39+
}
40+
41+
Value *foldNot(VPValue *Op) const {
42+
auto *C = getIRConstant(Op);
43+
if (C)
44+
return ConstantExpr::get(Instruction::BinaryOps::Xor, C,
45+
Constant::getAllOnesValue(C->getType()));
46+
return nullptr;
47+
}
48+
49+
Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
50+
auto *LC = getIRConstant(LHS);
51+
auto *RC = getIRConstant(RHS);
52+
if (LC && RC)
53+
return ConstantFoldSelectInstruction(
54+
LC, RC, ConstantInt::getNullValue(RC->getType()));
55+
return nullptr;
56+
}
57+
58+
Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
59+
auto *CC = getIRConstant(Cond);
60+
auto *TV = getIRConstant(TrueVal);
61+
auto *FV = getIRConstant(FalseVal);
62+
if (CC && TV && FV)
63+
return ConstantFoldSelectInstruction(CC, TV, FV);
64+
return nullptr;
65+
}
66+
67+
Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
68+
auto *LC = getIRConstant(LHS);
69+
auto *RC = getIRConstant(RHS);
70+
if (LC && RC)
71+
return ConstantFoldCompareInstruction(Pred, LC, RC);
72+
return nullptr;
73+
}
74+
75+
Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
76+
auto *BC = getIRConstant(Base);
77+
auto *OC = getIRConstant(Offset);
78+
if (BC && OC) {
79+
auto &Ctx = BC->getType()->getContext();
80+
return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), BC, OC, NW);
81+
}
82+
return nullptr;
83+
}
84+
85+
Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
86+
Type *DestTy) const {
87+
auto *C = getIRConstant(Op);
88+
if (C) {
89+
if (ConstantExpr::isDesirableCastOp(Opcode))
90+
return ConstantExpr::getCast(Opcode, C, DestTy);
91+
return ConstantFoldCastInstruction(Opcode, C, DestTy);
92+
}
93+
return nullptr;
94+
}
95+
};
96+
} // namespace llvm

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
533533
VPBuilder &Builder) {
534534
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
535535
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
536-
VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
537-
Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
536+
VPValue *BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV,
537+
Step, "offset.idx");
538538

539539
// Truncate base induction if needed.
540540
Type *CanonicalIVType = CanonicalIV->getScalarType();

llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll

Lines changed: 2 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,56 +12,10 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
1212
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10
1313
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1414
; CHECK: [[VECTOR_PH]]:
15+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0
1516
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1617
; CHECK: [[VECTOR_BODY]]:
17-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ]
18-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
19-
; CHECK: [[PRED_UDIV_IF]]:
20-
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[D]]
21-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]]
22-
; CHECK: [[PRED_UDIV_CONTINUE]]:
23-
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ]
24-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
25-
; CHECK: [[PRED_UDIV_IF1]]:
26-
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]]
27-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE2]]
28-
; CHECK: [[PRED_UDIV_CONTINUE2]]:
29-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
30-
; CHECK: [[PRED_UDIV_IF3]]:
31-
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 99, [[D]]
32-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE4]]
33-
; CHECK: [[PRED_UDIV_CONTINUE4]]:
34-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
35-
; CHECK: [[PRED_UDIV_IF5]]:
36-
; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 99, [[D]]
37-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE6]]
38-
; CHECK: [[PRED_UDIV_CONTINUE6]]:
39-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
40-
; CHECK: [[PRED_UDIV_IF7]]:
41-
; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 99, [[D]]
42-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE8]]
43-
; CHECK: [[PRED_UDIV_CONTINUE8]]:
44-
; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ]
45-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
46-
; CHECK: [[PRED_UDIV_IF9]]:
47-
; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 99, [[D]]
48-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE10]]
49-
; CHECK: [[PRED_UDIV_CONTINUE10]]:
50-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
51-
; CHECK: [[PRED_UDIV_IF11]]:
52-
; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 99, [[D]]
53-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE12]]
54-
; CHECK: [[PRED_UDIV_CONTINUE12]]:
55-
; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
56-
; CHECK: [[PRED_UDIV_IF13]]:
57-
; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 99, [[D]]
58-
; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE14]]
59-
; CHECK: [[PRED_UDIV_CONTINUE14]]:
60-
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]]
61-
; CHECK-NEXT: [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]]
62-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
63-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]]
64-
; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2
18+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
6519
; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2
6620
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6721
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96

llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,15 +202,14 @@ exit:
202202
; DBG-NEXT: Successor(s): vector.ph
203203
; DBG-EMPTY:
204204
; DBG-NEXT: vector.ph:
205-
; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
206205
; DBG-NEXT: Successor(s): vector loop
207206
; DBG-EMPTY:
208207
; DBG-NEXT: <x1> vector loop: {
209208
; DBG-NEXT: vector.body:
210-
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
209+
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.*]]>
211210
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
212211
; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
213-
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
212+
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>
214213
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
215214
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
216215
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>

0 commit comments

Comments
 (0)