Skip to content

Commit 16b3d9e

Browse files
authored
[SYCL][Fusion] Adapt internalization to opaque pointers. (#10882)
Fixes `sycl/test-e2e/KernelFusion/internalize_array_wrapper.cpp` which was temporarily deactivated in #9828. Switching to opaque pointers meant that accesses to the data structure in the testcase were no longer represented as one "deep" GEP, but rather a series of GEPs into the different nesting levels of the structure. Rather than bailing out, we now keep track of GEPs that index into an aggregate object, and exclude their users from the modulo-`LocalSize` rewriting (the pointer types still have to be changed to the desired target address space, though). In addition, perform `-simplifycfg` before internalization in order to get rid of unreachable code which, in this particular case, contained Phi nodes on the candidate accessor. --------- Signed-off-by: Julian Oppermann <[email protected]>
1 parent bbbe34d commit 16b3d9e

File tree

7 files changed

+596
-43
lines changed

7 files changed

+596
-43
lines changed

sycl-fusion/jit-compiler/lib/fusion/FusionPipeline.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ FusionPipeline::runFusionPasses(Module &Mod, SYCLModuleInfo &InputInfo,
105105
// Ideally, the static compiler should have performed that job.
106106
const unsigned FlatAddressSpace = getFlatAddressSpace(Mod);
107107
FPM.addPass(InferAddressSpacesPass(FlatAddressSpace));
108+
// Run CFG simplification to prevent unreachable code from obscuring
109+
// internalization opportunities.
110+
FPM.addPass(SimplifyCFGPass{});
108111
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
109112
}
110113
// Run dataflow internalization and runtime constant propagation.

sycl-fusion/passes/internalization/Internalization.cpp

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ struct SYCLInternalizerImpl {
5858
/// - Promote the function and call the new function instead,
5959
/// keeping the original function.
6060
/// - The value appears in a load/store operation: Do nothing
61-
void promoteValue(Value *Val, std::size_t LocalSize) const;
61+
void promoteValue(Value *Val, std::size_t LocalSize, bool InAggregate) const;
6262

6363
void promoteGEPI(GetElementPtrInst *GEPI, const Value *Val,
64-
std::size_t LocalSize) const;
64+
std::size_t LocalSize, bool InAggregate) const;
6565

6666
void promoteCall(CallBase *C, const Value *Val, std::size_t LocalSize) const;
6767

@@ -81,23 +81,26 @@ struct SYCLInternalizerImpl {
8181
///
8282
/// Check that an value can be promoted.
8383
/// For GEP and Call instructions, delegate to the specific implementations.
84+
/// \p InAggregate indicates that at least one GEP instruction addressing into
85+
/// an aggregate object was encountered, hence \p Val no longer represents a
86+
/// pure offset computation on the original candidate argument.
8487
/// For address-space casts, pointer-to-int conversions and unknown users,
8588
/// return an error.
86-
Error canPromoteValue(Value *Val, size_t LocalSize) const;
89+
Error canPromoteValue(Value *Val, size_t LocalSize, bool InAggregate) const;
8790

8891
///
89-
/// Check that the operand of a GEP can be promoted.
90-
/// If the GEP uses more than one index, return an error.
91-
/// Otherwise, check if the GEP itself can be promoted in its users.
92+
/// Check that the operand of a GEP can be promoted to its users, and
93+
/// propagate whether it represents a pointer into an aggregate object.
9294
Error canPromoteGEP(GetElementPtrInst *GEPI, const Value *Val,
93-
size_t LocalSize) const;
95+
size_t LocalSize, bool InAggregate) const;
9496

9597
///
9698
/// Check if operand to a function call can be promoted.
97-
/// If the function returns a pointer, return an error.
98-
/// Otherwise, check if the corresponding formal parameter can be promoted in
99-
/// the function body.
100-
Error canPromoteCall(CallBase *C, const Value *Val, size_t LocalSize) const;
99+
/// If the function returns a pointer, or the operand points into an aggregate
100+
/// object, return an error. Otherwise, check if the corresponding formal
101+
/// parameter can be promoted in the function body.
102+
Error canPromoteCall(CallBase *C, const Value *Val, size_t LocalSize,
103+
bool InAggregate) const;
101104

102105
Error checkArgsPromotable(Function *F,
103106
SmallVectorImpl<size_t> &PromoteArgSizes) const;
@@ -212,7 +215,8 @@ getUsagesInternalization(const User *U, const Value *V, std::size_t LocalSize) {
212215
}
213216

214217
Error SYCLInternalizerImpl::canPromoteCall(CallBase *C, const Value *Val,
215-
size_t LocalSize) const {
218+
size_t LocalSize,
219+
bool InAggregate) const {
216220
if (isa<PointerType>(C->getType())) {
217221
// With opaque pointers, we do not have the necessary information to compare
218222
// the element-type of the pointer returned by the function and the element
@@ -222,6 +226,12 @@ Error SYCLInternalizerImpl::canPromoteCall(CallBase *C, const Value *Val,
222226
inconvertibleErrorCode(),
223227
"It is not safe to promote a called function which returns a pointer.");
224228
}
229+
if (InAggregate) {
230+
return createStringError(
231+
inconvertibleErrorCode(),
232+
"Promotion of a pointer into an aggregate object to a called function "
233+
"is currently not supported.");
234+
}
225235

226236
SmallVector<size_t> InternInfo = getUsagesInternalization(C, Val, LocalSize);
227237
assert(!InternInfo.empty() && "Value must be used at least once");
@@ -232,27 +242,23 @@ Error SYCLInternalizerImpl::canPromoteCall(CallBase *C, const Value *Val,
232242
}
233243

234244
Error SYCLInternalizerImpl::canPromoteGEP(GetElementPtrInst *GEPI,
235-
const Value *Val,
236-
size_t LocalSize) const {
245+
const Value *Val, size_t LocalSize,
246+
bool InAggregate) const {
237247
if (cast<PointerType>(GEPI->getType())->getAddressSpace() == AS) {
238248
// If the GEPI is already using the correct address-space, no change is
239249
// required.
240250
return Error::success();
241251
}
242-
if (GEPI->getNumIndices() != 1 &&
243-
std::any_of(GEPI->user_begin(), GEPI->user_end(), [](const auto *User) {
244-
return isa<GetElementPtrInst>(User);
245-
})) {
246-
return createStringError(inconvertibleErrorCode(),
247-
"Only one index expected in source of "
248-
"promotable GEP instruction pointer argument");
249-
}
250-
// Recurse to check all users of the GEP.
251-
return canPromoteValue(GEPI, LocalSize);
252+
// Recurse to check all users of the GEP. We are either already in
253+
// `InAggregate` mode, or inspect the current instruction. Recall that a GEP's
254+
// first index is used to step through the base pointer, whereas any
255+
// additional indices represent addressing into an aggregrate type.
256+
return canPromoteValue(GEPI, LocalSize,
257+
InAggregate || GEPI->getNumIndices() >= 2);
252258
}
253259

254-
Error SYCLInternalizerImpl::canPromoteValue(Value *Val,
255-
size_t LocalSize) const {
260+
Error SYCLInternalizerImpl::canPromoteValue(Value *Val, size_t LocalSize,
261+
bool InAggregate) const {
256262
for (auto *U : Val->users()) {
257263
auto *I = dyn_cast<Instruction>(U);
258264
if (!I) {
@@ -272,13 +278,14 @@ Error SYCLInternalizerImpl::canPromoteValue(Value *Val,
272278
case Instruction::Call:
273279
case Instruction::Invoke:
274280
case Instruction::CallBr:
275-
if (auto Err = canPromoteCall(cast<CallBase>(I), Val, LocalSize)) {
281+
if (auto Err =
282+
canPromoteCall(cast<CallBase>(I), Val, LocalSize, InAggregate)) {
276283
return Err;
277284
}
278285
break;
279286
case Instruction::GetElementPtr:
280-
if (auto Err =
281-
canPromoteGEP(cast<GetElementPtrInst>(I), Val, LocalSize)) {
287+
if (auto Err = canPromoteGEP(cast<GetElementPtrInst>(I), Val, LocalSize,
288+
InAggregate)) {
282289
return Err;
283290
}
284291
break;
@@ -316,7 +323,7 @@ Error SYCLInternalizerImpl::checkArgsPromotable(
316323
PromoteArgSizes[Index] = 0;
317324
continue;
318325
}
319-
if (auto Err = canPromoteValue(Arg, LocalSize)) {
326+
if (auto Err = canPromoteValue(Arg, LocalSize, /*InAggregate=*/false)) {
320327
// Set the local size to 0 to indicate that this argument should not be
321328
// promoted.
322329
PromoteArgSizes[Index] = 0;
@@ -360,30 +367,37 @@ void SYCLInternalizerImpl::promoteCall(CallBase *C, const Value *Val,
360367
}
361368

362369
void SYCLInternalizerImpl::promoteGEPI(GetElementPtrInst *GEPI,
363-
const Value *Val,
364-
std::size_t LocalSize) const {
370+
const Value *Val, std::size_t LocalSize,
371+
bool InAggregate) const {
365372
// Not PointerType is unreachable. Other case is catched in caller.
366373
if (cast<PointerType>(GEPI->getType())->getAddressSpace() != AS) {
367-
remapIndices(GEPI, LocalSize);
374+
if (!InAggregate)
375+
remapIndices(GEPI, LocalSize);
368376
auto *ValTy = cast<PointerType>(Val->getType());
369377
GEPI->mutateType(PointerType::getWithSamePointeeType(
370378
cast<PointerType>(GEPI->getType()), ValTy->getAddressSpace()));
371-
return promoteValue(GEPI, LocalSize);
379+
// Recurse to promote to all users of the GEP. We are either already in
380+
// `InAggregate` mode, or inspect the current instruction. Recall that a
381+
// GEP's first index is used to step through the base pointer, whereas any
382+
// additional indices represent addressing into an aggregrate type.
383+
return promoteValue(GEPI, LocalSize,
384+
InAggregate || GEPI->getNumIndices() >= 2);
372385
}
373386
}
374387

375-
void SYCLInternalizerImpl::promoteValue(Value *Val,
376-
std::size_t LocalSize) const {
388+
void SYCLInternalizerImpl::promoteValue(Value *Val, std::size_t LocalSize,
389+
bool InAggregate) const {
377390
for (auto *U : Val->users()) {
378391
auto *I = cast<Instruction>(U);
379392
switch (I->getOpcode()) {
380393
case Instruction::Call:
381394
case Instruction::Invoke:
382395
case Instruction::CallBr:
396+
assert(!InAggregate);
383397
promoteCall(cast<CallBase>(I), Val, LocalSize);
384398
break;
385399
case Instruction::GetElementPtr:
386-
promoteGEPI(cast<GetElementPtrInst>(I), Val, LocalSize);
400+
promoteGEPI(cast<GetElementPtrInst>(I), Val, LocalSize, InAggregate);
387401
break;
388402
case Instruction::Load:
389403
case Instruction::Store:
@@ -535,7 +549,7 @@ Function *SYCLInternalizerImpl::promoteFunctionArgs(
535549
if (CreateAllocas) {
536550
Arg = replaceByNewAlloca(cast<Argument>(Arg), AS, LocalSize);
537551
}
538-
promoteValue(Arg, LocalSize);
552+
promoteValue(Arg, LocalSize, /*InAggregate=*/false);
539553
}
540554

541555
TargetInfo.updateAddressSpaceMetadata(NewF, PromoteToLocal, AS);
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: opt -load-pass-plugin %shlibdir/SYCLKernelFusion%shlibext \
2+
; RUN: -passes=sycl-internalization --sycl-info-path %S/abort-kernel-info.yaml -S %s | FileCheck %s
3+
4+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
5+
target triple = "spir64-unknown-unknown"
6+
7+
; Function Attrs: noinline
8+
define void @fun(ptr addrspace(1) %arg) #0 {
9+
ret void
10+
}
11+
12+
%struct = type { i32, i32, i32 }
13+
14+
; CHECK-LABEL: define {{[^@]+}}@fused_0
15+
; CHECK-SAME: (ptr addrspace(1) align 4 %[[ACC:.*]])
16+
define spir_kernel void @fused_0(ptr addrspace(1) align 4 %acc) !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_type_qual !15 !kernel_arg_base_type !14 !kernel_arg_name !16 !sycl.kernel.promote !17 !sycl.kernel.promote.localsize !18 {
17+
; Scenario: Test private internalization is not performed when pointers into
18+
; aggregate object are passed to function calls.
19+
20+
; CHECK-NOT: alloca [1 x %struct]
21+
%gep1 = getelementptr %struct, ptr addrspace(1) %acc, i64 17
22+
%gep2 = getelementptr %struct, ptr addrspace(1) %gep1, i64 0, i32 2
23+
call void @fun(ptr addrspace(1) %gep2)
24+
store i32 42, ptr addrspace(1) %gep2
25+
ret void
26+
}
27+
28+
attributes #0 = { noinline }
29+
30+
!12 = !{i32 1}
31+
!13 = !{!"none"}
32+
!14 = !{!"ptr"}
33+
!15 = !{!""}
34+
!16 = !{!"acc"}
35+
!17 = !{!"private"}
36+
!18 = !{i64 1}

0 commit comments

Comments
 (0)