Skip to content

Commit 422a5fa

Browse files
authored
[SYCL] Add unnamed lambda kernels support (#387)
Add support for kernels without kernel name under -fsycl-unnamed-lambda flag. User activates the extension with the new flag. Uses new __unique_stable_name compiler built-in. Only the version of the integration header needed is generated (rather than both version with #ifdef). Defines __SYCL_UNNAMED_LAMBDA__ when extension is active. Signed-off-by: Roland Schulz <[email protected]> Signed-off-by: Alexey Bader <[email protected]>
1 parent 08a923f commit 422a5fa

File tree

12 files changed

+137
-23
lines changed

12 files changed

+137
-23
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
226226
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
227227
LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
228228
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
229+
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
229230

230231
LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
231232
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,6 +1744,8 @@ def fno_sycl_use_bitcode : Flag<["-"], "fno-sycl-use-bitcode">,
17441744
Flags<[CC1Option]>, HelpText<"Use SPIR-V instead of LLVM bitcode in fat objects">;
17451745
def fsycl_link : Flag<["-"], "fsycl-link">,
17461746
Flags<[CC1Option]>, HelpText<"Generate partially linked device object to be used with the host link">;
1747+
def fsycl_unnamed_lambda : Flag<["-"], "fsycl-unnamed-lambda">,
1748+
Flags<[CC1Option]>, HelpText<"Allow unnamed SYCL lambda kernels">;
17471749
def fsyntax_only : Flag<["-"], "fsyntax-only">,
17481750
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
17491751
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;

clang/include/clang/Sema/Sema.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ class SYCLIntegrationHeader {
306306
};
307307

308308
public:
309-
SYCLIntegrationHeader(DiagnosticsEngine &Diag);
309+
SYCLIntegrationHeader(DiagnosticsEngine &Diag, bool UnnamedLambdaSupport);
310310

311311
/// Emits contents of the header into given stream.
312312
void emit(raw_ostream &Out);
@@ -317,7 +317,8 @@ class SYCLIntegrationHeader {
317317

318318
/// Signals that subsequent parameter descriptor additions will go to
319319
/// the kernel with given name. Starts new kernel invocation descriptor.
320-
void startKernel(StringRef KernelName, QualType KernelNameType);
320+
void startKernel(StringRef KernelName, QualType KernelNameType,
321+
StringRef KernelStableName);
321322

322323
/// Adds a kernel parameter descriptor to current kernel invocation
323324
/// descriptor.
@@ -352,6 +353,9 @@ class SYCLIntegrationHeader {
352353
/// Kernel name type.
353354
QualType NameType;
354355

356+
/// Kernel name with stable lambda name mangling
357+
std::string StableName;
358+
355359
/// Descriptor of kernel actual parameters.
356360
SmallVector<KernelParamDesc, 8> Params;
357361

@@ -387,6 +391,9 @@ class SYCLIntegrationHeader {
387391

388392
/// Used for emitting diagnostics.
389393
DiagnosticsEngine &Diag;
394+
395+
/// Whether header is generated with unnamed lambda support
396+
bool UnnamedLambdaSupport;
390397
};
391398

392399
/// Keeps track of expected type during expression parsing. The type is tied to
@@ -11404,7 +11411,7 @@ class Sema {
1140411411
SYCLIntegrationHeader &getSyclIntegrationHeader() {
1140511412
if (SyclIntHeader == nullptr)
1140611413
SyclIntHeader = llvm::make_unique<SYCLIntegrationHeader>(
11407-
getDiagnostics());
11414+
getDiagnostics(), getLangOpts().SYCLUnnamedLambda);
1140811415
return *SyclIntHeader.get();
1140911416
}
1141011417

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5373,6 +5373,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
53735373
HeaderOpt += Output.getFilename();
53745374
CmdArgs.push_back(Args.MakeArgString(HeaderOpt));
53755375
}
5376+
if (Args.hasArg(options::OPT_fsycl_unnamed_lambda))
5377+
CmdArgs.push_back("-fsycl-unnamed-lambda");
53765378
}
53775379

53785380
// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2991,6 +2991,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
29912991
Opts.SYCLIsHost = Args.hasArg(options::OPT_fsycl_is_host);
29922992
Opts.SYCLAllowFuncPtr = Args.hasFlag(options::OPT_fsycl_allow_func_ptr,
29932993
options::OPT_fno_sycl_allow_func_ptr, false);
2994+
Opts.SYCLUnnamedLambda = Args.hasArg(options::OPT_fsycl_unnamed_lambda);
29942995

29952996
// Set CUDA mode for OpenMP target NVPTX if specified in options
29962997
Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
10771077
if (!getenv("DISABLE_INFER_AS"))
10781078
Builder.defineMacro("__SYCL_ENABLE_INFER_AS__", "1");
10791079
}
1080+
if (LangOpts.SYCLUnnamedLambda)
1081+
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__", "1");
10801082

10811083
// OpenCL definitions.
10821084
if (LangOpts.OpenCL) {

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,9 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name,
936936

937937
ASTContext &Ctx = KernelObjTy->getASTContext();
938938
const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(KernelObjTy);
939-
H.startKernel(Name, NameType);
939+
const std::string StableName = PredefinedExpr::ComputeName(
940+
Ctx, PredefinedExpr::UniqueStableNameExpr, NameType);
941+
H.startKernel(Name, NameType, StableName);
940942

941943
auto populateHeaderForAccessor = [&](const QualType &ArgTy, uint64_t Offset) {
942944
// The parameter is a SYCL accessor object.
@@ -1247,7 +1249,7 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) {
12471249
? cast<ClassTemplateDecl>(D)->getTemplatedDecl()
12481250
: dyn_cast<TagDecl>(D);
12491251

1250-
if (TD && TD->isCompleteDefinition()) {
1252+
if (TD && TD->isCompleteDefinition() && !UnnamedLambdaSupport) {
12511253
// defined class constituting the kernel name is not globally
12521254
// accessible - contradicts the spec
12531255
Diag.Report(D->getSourceRange().getBegin(),
@@ -1377,11 +1379,13 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
13771379
O << "#include <CL/sycl/detail/kernel_desc.hpp>\n";
13781380

13791381
O << "\n";
1380-
O << "// Forward declarations of templated kernel function types:\n";
1382+
if (!UnnamedLambdaSupport) {
1383+
O << "// Forward declarations of templated kernel function types:\n";
13811384

1382-
llvm::SmallPtrSet<const void *, 4> Printed;
1383-
for (const KernelDesc &K : KernelDescs) {
1384-
emitForwardClassDecls(O, K.NameType, Printed);
1385+
llvm::SmallPtrSet<const void *, 4> Printed;
1386+
for (const KernelDesc &K : KernelDescs) {
1387+
emitForwardClassDecls(O, K.NameType, Printed);
1388+
}
13851389
}
13861390
O << "\n";
13871391

@@ -1444,19 +1448,21 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
14441448
}
14451449
O << "};\n\n";
14461450

1447-
O << "// Specializations of this template class encompasses information\n";
1448-
O << "// about a kernel. The kernel is identified by the template\n";
1449-
O << "// parameter type.\n";
1450-
O << "template <class KernelNameType> struct KernelInfo;\n";
1451-
O << "\n";
1452-
14531451
O << "// Specializations of KernelInfo for kernel function types:\n";
14541452
CurStart = 0;
14551453

14561454
for (const KernelDesc &K : KernelDescs) {
14571455
const size_t N = K.Params.size();
1458-
O << "template <> struct KernelInfo<"
1459-
<< eraseAnonNamespace(K.NameType.getAsString()) << "> {\n";
1456+
if (UnnamedLambdaSupport) {
1457+
O << "template <> struct KernelInfoData<";
1458+
O << "'" << K.StableName.front();
1459+
for (char c : StringRef(K.StableName).substr(1))
1460+
O << "', '" << c;
1461+
O << "'> {\n";
1462+
} else {
1463+
O << "template <> struct KernelInfo<"
1464+
<< eraseAnonNamespace(K.NameType.getAsString()) << "> {\n";
1465+
}
14601466
O << " DLL_LOCAL\n";
14611467
O << " static constexpr const char* getName() { return \"" << K.Name
14621468
<< "\"; }\n";
@@ -1494,10 +1500,12 @@ bool SYCLIntegrationHeader::emit(const StringRef &IntHeaderName) {
14941500
}
14951501

14961502
void SYCLIntegrationHeader::startKernel(StringRef KernelName,
1497-
QualType KernelNameType) {
1503+
QualType KernelNameType,
1504+
StringRef KernelStableName) {
14981505
KernelDescs.resize(KernelDescs.size() + 1);
14991506
KernelDescs.back().Name = KernelName;
15001507
KernelDescs.back().NameType = KernelNameType;
1508+
KernelDescs.back().StableName = KernelStableName;
15011509
}
15021510

15031511
void SYCLIntegrationHeader::addParamDesc(kernel_param_kind_t Kind, int Info,
@@ -1515,8 +1523,9 @@ void SYCLIntegrationHeader::endKernel() {
15151523
// nop for now
15161524
}
15171525

1518-
SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag)
1519-
: Diag(_Diag) {}
1526+
SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag,
1527+
bool _UnnamedLambdaSupport)
1528+
: Diag(_Diag), UnnamedLambdaSupport(_UnnamedLambdaSupport) {}
15201529

15211530
bool Util::isSyclAccessorType(const QualType &Ty) {
15221531
return isSyclType(Ty, "accessor", true /*Tmpl*/);

clang/test/CodeGenSYCL/integration_header.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
// CHECK-EMPTY:
4848
// CHECK-NEXT: };
4949
//
50-
// CHECK: template <class KernelNameType> struct KernelInfo;
5150
// CHECK: template <> struct KernelInfo<class first_kernel> {
5251
// CHECK: template <> struct KernelInfo<::second_namespace::second_kernel<char>> {
5352
// CHECK: template <> struct KernelInfo<::third_kernel<1, int, ::point<X> >> {

clang/test/CodeGenSYCL/wrapped-accessor.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
// CHECK-NEXT: 0 // _ZTSZ4mainE14wrapped_access
2828
// CHECK-NEXT: };
2929

30-
// CHECK: template <class KernelNameType> struct KernelInfo;
31-
3230
// CHECK: template <> struct KernelInfo<class wrapped_access> {
3331

3432
#include <sycl.hpp>

sycl/include/CL/sycl/detail/kernel_desc.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ struct kernel_param_desc_t {
5151
int offset;
5252
};
5353

54+
#ifndef __SYCL_UNNAMED_LAMBDA__
5455
template <class KernelNameType> struct KernelInfo {
5556
static constexpr unsigned getNumParams() { return 0; }
5657
static const kernel_param_desc_t &getParamDesc(int Idx) {
@@ -59,6 +60,29 @@ template <class KernelNameType> struct KernelInfo {
5960
}
6061
static constexpr const char *getName() { return ""; }
6162
};
63+
#else
64+
template <char...> struct KernelInfoData; // Should this have dummy impl?
65+
66+
// C++14 like index_sequence and make_index_sequence
67+
// not needed C++14 members (value_type, size) not implemented
68+
template <class T, T...> struct integer_sequence {};
69+
template <size_t... I> using index_sequence = integer_sequence<size_t, I...>;
70+
template <size_t N>
71+
using make_index_sequence = __make_integer_seq<integer_sequence, size_t, N>;
72+
73+
template <typename T> struct KernelInfoImpl {
74+
private:
75+
static constexpr auto n = __unique_stable_name(T);
76+
template <std::size_t... I>
77+
static KernelInfoData<n[I]...> impl(index_sequence<I...>) {
78+
return {};
79+
}
80+
81+
public:
82+
using type = decltype(impl(make_index_sequence<__builtin_strlen(n)>{}));
83+
};
84+
template <typename T> using KernelInfo = typename KernelInfoImpl<T>::type;
85+
#endif //__SYCL_UNNAMED_LAMBDA__
6286

6387
} // namespace detail
6488
} // namespace sycl

0 commit comments

Comments
 (0)