From 652caa8e1b6c4c5ec989879eaf526a704d1db79a Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 14 Aug 2024 15:16:05 -0700 Subject: [PATCH 001/107] Preliminary implementation of work_group_memory extension --- clang/include/clang/Basic/Attr.td | 4 ++-- .../SemaSYCL/Inputs/sycl/detail/kernel_desc.hpp | 1 + sycl/include/sycl/detail/kernel_desc.hpp | 1 + sycl/include/sycl/handler.hpp | 10 +++++++++- sycl/include/sycl/sycl.hpp | 1 + sycl/source/detail/scheduler/commands.cpp | 2 ++ sycl/source/feature_test.hpp.in | 1 + sycl/source/handler.cpp | 15 ++++++++++----- 8 files changed, 27 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 1dc61986be561..f3e2890cd1068 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1513,12 +1513,12 @@ def SYCLType: InheritableAttr { let Subjects = SubjectList<[CXXRecord, Enum], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Args = [EnumArgument<"Type", "SYCLType", /*is_string=*/true, - ["accessor", "local_accessor", + ["accessor", "local_accessor", "work_group_memory", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", "stream", "sampler", "host_pipe", "multi_ptr"], - ["accessor", "local_accessor", + ["accessor", "local_accessor", "work_group_memory", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", diff --git a/clang/test/SemaSYCL/Inputs/sycl/detail/kernel_desc.hpp b/clang/test/SemaSYCL/Inputs/sycl/detail/kernel_desc.hpp index 6ef9122d45209..820013ed0eff7 100644 --- a/clang/test/SemaSYCL/Inputs/sycl/detail/kernel_desc.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl/detail/kernel_desc.hpp @@ -18,6 +18,7 @@ namespace detail { kind_pointer = 3, kind_specialization_constants_buffer = 4, kind_stream = 5, + kind_work_group_memory = 6, kind_invalid = 0xf, // not a valid kernel kind }; diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index 85519c3388efd..535ab3d730298 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -42,6 +42,7 @@ enum class kernel_param_kind_t { kind_pointer = 3, kind_specialization_constants_buffer = 4, kind_stream = 5, + kind_work_group_memory = 6, kind_invalid = 0xf, // not a valid kernel kind }; diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 6f2e9f9fc19b7..c41055479a3ea 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -687,7 +688,10 @@ class __SYCL_EXPORT handler { setLocalAccessorArgHelper(ArgIndex, Arg); #endif } - + template + void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory Arg) { + addArg(detail::kernel_param_kind_t::kind_work_group_memory, nullptr, Arg->size, ArgIndex); +} // setArgHelper for non local accessor argument. template @@ -2021,6 +2025,10 @@ class __SYCL_EXPORT handler { setArgHelper(ArgIndex, std::move(Arg)); } + template + void set_arg(int ArgIndex, ext::oneapi::experimental::work_group_memory Arg) { + setArgHelper(ArgIndex, std::move(Arg)); +} // set_arg for graph dynamic_parameters template void set_arg(int argIndex, diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index ab44642639b0d..fc5aff80487f8 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f2ac3963b76c6..7e0fa1d372a7c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2282,6 +2282,8 @@ void SetArgBasedOnType( const std::function &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex) { switch (Arg.MType) { + case kernel_param_kind_t::kind_work_group_memory: + break; case kernel_param_kind_t::kind_stream: break; case kernel_param_kind_t::kind_accessor: { diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index ac0ba886240e3..1cfbdaf53c525 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -112,6 +112,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_RAW_KERNEL_ARG 1 #define SYCL_EXT_ONEAPI_PROFILING_TAG 1 #define SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND 1 +#define SYCL_EXT_ONEAPI_WORK_GROUP_MEMORY 1 #ifndef __has_include #define __has_include(x) 0 diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 95421a80a8ce5..d8bc8d2258d33 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "ur_api.h" #include "sycl/detail/helpers.hpp" +#include "ur_api.h" #include #include @@ -789,6 +789,11 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } break; } + case kernel_param_kind_t::kind_work_group_memory: { + addArg(kernel_param_kind_t::kind_std_layout, nullptr, Size, + Index + IndexShift); + break; + } case kernel_param_kind_t::kind_sampler: { addArg(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), Index + IndexShift); @@ -1127,7 +1132,7 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); Desc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); + MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; ur_image_desc_t UrDesc = {}; @@ -1180,8 +1185,8 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); ImageDesc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); - MDstPtr = reinterpret_cast(Dest.raw_handle); + MSrcPtr = reinterpret_cast(Src.raw_handle); + MDstPtr = reinterpret_cast(Dest.raw_handle); ur_image_desc_t UrDesc = {}; UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; @@ -1235,7 +1240,7 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); SrcImgDesc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); + MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; ur_image_desc_t UrDesc = {}; From 76daf777145d70816f6395b0bb304637f4d5d421 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 14 Aug 2024 15:16:28 -0700 Subject: [PATCH 002/107] Preliminary implementation of work_group_memory extension --- .../oneapi/experimental/work_group_memory.hpp | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp new file mode 100644 index 0000000000000..4442ee4b5cc1d --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -0,0 +1,77 @@ +//===-------------------- work_group_memory.hpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { +template struct is_unbounded_array : std::false_type {}; + +template struct is_unbounded_array : std::true_type {}; + +template +inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; +} // namespace detail + +namespace ext::oneapi::experimental { + +template +class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory { +public: + using value_type = std::remove_all_extents_t; + work_group_memory() = default; + work_group_memory(const work_group_memory &rhs) = default; + work_group_memory &operator=(const work_group_memory &rhs) = default; +#ifndef __SYCL_DEVICE_ONLY__ + template >> + work_group_memory(handler &) : size{sizeof(DataT)} {} + template >> + work_group_memory(size_t num, handler &cgh) { + size = num * sizeof(std::remove_extent_t); + } +#endif +#ifdef __SYCL_DEVICE_ONLY__ + template + multi_ptr + get_multi_ptr() const { + return sycl::address_space_cast(ptr); + } + DataT *operator&() const { return reinterpret_cast(ptr); } + operator DataT &() const { return *(this->operator&()); } + template >> + const work_group_memory &operator=(const DataT &value) const { + *ptr = value; + return *this; + } + + void __init(decoratedPtr ptr) { this->ptr = ptr; } +#endif +private: +#ifdef __SYCL_DEVICE_ONLY__ + using decoratedPtr = typename detail::DecoratedType< + value_type, access : address_space::local_space>::type *; + decoratedPtr ptr; +#endif + size_t size; + friend class handler; +}; +} // namespace ext::oneapi::experimental +} // namespace _V1 + +template +struct is_device_copyable> + : std::true_type {}; + +} // namespace sycl From 21e082b1dc7867424b1d362864ad1422822b9e0a Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 16 Aug 2024 12:21:27 -0700 Subject: [PATCH 003/107] Implement work_group_memory extension --- clang/include/clang/Sema/SemaSYCL.h | 3 +- clang/lib/Sema/SemaSYCL.cpp | 68 +++++++++---------- .../oneapi/experimental/work_group_memory.hpp | 49 ++++++++----- sycl/include/sycl/handler.hpp | 11 +-- sycl/source/handler.cpp | 2 +- 5 files changed, 74 insertions(+), 59 deletions(-) diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index f285dbfb0baa8..b5cab369724b7 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -60,7 +60,8 @@ class SYCLIntegrationHeader { kind_pointer, kind_specialization_constants_buffer, kind_stream, - kind_last = kind_stream + kind_work_group_memory, + kind_last = kind_work_group_memory }; public: diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 75e0d5fd9eb9f..79c57356bdb82 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -23,8 +23,8 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" -#include "clang/Sema/Initialization.h" #include "clang/Sema/Attr.h" +#include "clang/Sema/Initialization.h" #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Sema.h" #include "llvm/ADT/APSInt.h" @@ -393,8 +393,7 @@ bool SemaSYCL::isDeclAllowedInSYCLDeviceCode(const Decl *D) { return true; const DeclContext *DC = FD->getDeclContext(); - if (II && II->isStr("__spirv_ocl_printf") && - !FD->isDefined() && + if (II && II->isStr("__spirv_ocl_printf") && !FD->isDefined() && FD->getLanguageLinkage() == CXXLanguageLinkage && DC->getEnclosingNamespaceContext()->isTranslationUnit()) return true; @@ -665,9 +664,7 @@ class DiagDeviceFunction : public RecursiveASTVisitor { // Make sure we skip the condition of the case, since that is a constant // expression. - bool TraverseCaseStmt(CaseStmt *S) { - return TraverseStmt(S->getSubStmt()); - } + bool TraverseCaseStmt(CaseStmt *S) { return TraverseStmt(S->getSubStmt()); } // Skip checking the size expr, since a constant array type loc's size expr is // a constant expression. @@ -932,7 +929,8 @@ class SingleDeviceFunctionTracker { !KernelBody->hasAttr() && !KernelBody->hasAttr()) { KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit( - KernelBody->getASTContext(), {}, AlwaysInlineAttr::Keyword_forceinline)); + KernelBody->getASTContext(), {}, + AlwaysInlineAttr::Keyword_forceinline)); } } @@ -1024,8 +1022,7 @@ class MarkWIScopeFnVisitor : public RecursiveASTVisitor { // not a member of sycl::group - continue search return true; auto Name = Callee->getName(); - if (Name != "wait_for" || - Callee->hasAttr()) + if (Name != "wait_for" || Callee->hasAttr()) return true; // it is a call to sycl::group::wait_for - mark the callee Callee->addAttr( @@ -1263,7 +1260,7 @@ class KernelObjVisitor { template void VisitUnionImpl(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, HandlerTys &... Handlers) { + const CXXRecordDecl *Wrapper, HandlerTys &...Handlers) { (void)std::initializer_list{ (Handlers.enterUnion(Owner, Parent), 0)...}; VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...); @@ -1273,13 +1270,13 @@ class KernelObjVisitor { // These enable handler execution only when previous Handlers succeed. template - bool handleField(FieldDecl *FD, QualType FDTy, Tn &&... tn) { + bool handleField(FieldDecl *FD, QualType FDTy, Tn &&...tn) { bool result = true; (void)std::initializer_list{(result = result && tn(FD, FDTy), 0)...}; return result; } template - bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&... tn) { + bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&...tn) { bool result = true; std::initializer_list{(result = result && tn(BD, BDTy), 0)...}; return result; @@ -1324,7 +1321,7 @@ class KernelObjVisitor { template void visitComplexRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &... Handlers) { + HandlerTys &...Handlers) { (void)std::initializer_list{ (Handlers.enterStruct(Owner, Parent, RecordTy), 0)...}; VisitRecordHelper(Wrapper, Wrapper->bases(), Handlers...); @@ -1336,7 +1333,7 @@ class KernelObjVisitor { template void visitSimpleRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &... Handlers) { + HandlerTys &...Handlers) { (void)std::initializer_list{ (Handlers.handleNonDecompStruct(Owner, Parent, RecordTy), 0)...}; } @@ -1344,16 +1341,16 @@ class KernelObjVisitor { template void visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &... Handlers); + HandlerTys &...Handlers); template void VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, HandlerTys &... Handlers); + const CXXRecordDecl *Wrapper, HandlerTys &...Handlers); template void VisitRecordHelper(const CXXRecordDecl *Owner, clang::CXXRecordDecl::base_class_const_range Range, - HandlerTys &... Handlers) { + HandlerTys &...Handlers) { for (const auto &Base : Range) { QualType BaseTy = Base.getType(); // Handle accessor class as base @@ -1370,14 +1367,14 @@ class KernelObjVisitor { template void VisitRecordHelper(const CXXRecordDecl *Owner, RecordDecl::field_range Range, - HandlerTys &... Handlers) { + HandlerTys &...Handlers) { VisitRecordFields(Owner, Handlers...); } template void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField, QualType ElementTy, uint64_t Index, - HandlerTys &... Handlers) { + HandlerTys &...Handlers) { (void)std::initializer_list{ (Handlers.nextElement(ElementTy, Index), 0)...}; visitField(Owner, ArrayField, ElementTy, Handlers...); @@ -1385,24 +1382,24 @@ class KernelObjVisitor { template void visitFirstArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, - QualType ElementTy, HandlerTys &... Handlers) { + QualType ElementTy, HandlerTys &...Handlers) { visitArrayElementImpl(Owner, ArrayField, ElementTy, 0, Handlers...); } template void visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, QualType ElementTy, uint64_t Index, - HandlerTys &... Handlers); + HandlerTys &...Handlers); template void visitSimpleArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &... Handlers) { + QualType ArrayTy, HandlerTys &...Handlers) { (void)std::initializer_list{ (Handlers.handleSimpleArrayType(Field, ArrayTy), 0)...}; } template void visitComplexArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &... Handlers) { + QualType ArrayTy, HandlerTys &...Handlers) { // Array workflow is: // handleArrayType // enterArray @@ -1434,7 +1431,7 @@ class KernelObjVisitor { template void visitField(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType FieldTy, HandlerTys &... Handlers) { + QualType FieldTy, HandlerTys &...Handlers) { if (isSyclSpecialType(FieldTy, SemaSYCLRef)) KF_FOR_EACH(handleSyclSpecialType, Field, FieldTy); else if (FieldTy->isStructureOrClassType()) { @@ -1506,14 +1503,14 @@ class KernelObjVisitor { template void VisitRecordBases(const CXXRecordDecl *KernelFunctor, - HandlerTys &... Handlers) { + HandlerTys &...Handlers) { VisitRecordHelper(KernelFunctor, KernelFunctor->bases(), Handlers...); } // A visitor function that dispatches to functions as defined in // SyclKernelFieldHandler for the purposes of kernel generation. template - void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &... Handlers) { + void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &...Handlers) { for (const auto Field : Owner->fields()) visitField(Owner, Field, Field->getType(), Handlers...); } @@ -1716,7 +1713,7 @@ template struct AllTrue { template void KernelObjVisitor::VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, - Handlers &... handlers) { + Handlers &...handlers) { // Don't continue descending if none of the handlers 'care'. This could be 'if // constexpr' starting in C++17. Until then, we have to count on the // optimizer to realize "if (false)" is a dead branch. @@ -1730,7 +1727,7 @@ template void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, QualType ElementTy, uint64_t Index, - Handlers &... handlers) { + Handlers &...handlers) { // Don't continue descending if none of the handlers 'care'. This could be 'if // constexpr' starting in C++17. Until then, we have to count on the // optimizer to realize "if (false)" is a dead branch. @@ -1744,8 +1741,7 @@ void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner, template void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, - QualType RecordTy, - HandlerTys &... Handlers) { + QualType RecordTy, HandlerTys &...Handlers) { RecordDecl *RD = RecordTy->getAsRecordDecl(); assert(RD && "should not be null."); if (RD->hasAttr()) { @@ -1790,7 +1786,7 @@ void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, template void KernelObjVisitor::visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &... Handlers) { + QualType ArrayTy, HandlerTys &...Handlers) { if (Field->hasAttr()) { visitComplexArray(Owner, Field, ArrayTy, Handlers...); @@ -4480,6 +4476,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { uint64_t Size; Size = SemaSYCLRef.getASTContext().getTypeSizeInChars(ParamTy).getQuantity(); + ParamTy->dump(); Header.addParamDesc(Kind, static_cast(Size), static_cast(CurOffset + OffsetAdj)); } @@ -4540,6 +4537,8 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { CurOffset + offsetOf(FD, FieldTy)); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::stream)) { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); + } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { + addParam(FieldTy, SYCLIntegrationHeader::kind_work_group_memory, offsetOf(FD, FieldTy)); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::sampler) || SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::annotated_ptr) || SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::annotated_arg)) { @@ -5672,6 +5671,7 @@ static const char *paramKind2Str(KernelParamKind K) { CASE(stream); CASE(specialization_constants_buffer); CASE(pointer); + CASE(work_group_memory); } return ""; @@ -6187,7 +6187,6 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "\n"; } - O << "// names of all kernels defined in the corresponding source\n"; O << "static constexpr\n"; O << "const char* const kernel_names[] = {\n"; @@ -6669,7 +6668,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { for (const VarDecl *VD : GlobalVars) { VD = VD->getCanonicalDecl(); - // Skip if this isn't a SpecIdType, DeviceGlobal, or HostPipe. This + // Skip if this isn't a SpecIdType, DeviceGlobal, or HostPipe. This // can happen if it was a deduced type. if (!SemaSYCL::isSyclType(VD->getType(), SYCLTypeAttr::specialization_id) && !SemaSYCL::isSyclType(VD->getType(), SYCLTypeAttr::host_pipe) && @@ -6714,8 +6713,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { VD->getNameForDiagnostic(HostPipesOS, Policy, true); } HostPipesOS << ", \""; - HostPipesOS << SYCLUniqueStableIdExpr::ComputeName(S.getASTContext(), - VD); + HostPipesOS << SYCLUniqueStableIdExpr::ComputeName(S.getASTContext(), VD); HostPipesOS << "\");\n"; } else { EmittedFirstSpecConstant = true; diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 4442ee4b5cc1d..ba186446b4dbc 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -19,53 +19,68 @@ template struct is_unbounded_array : std::true_type {}; template inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; + +class work_group_memory_impl { +public: + work_group_memory_impl() = default; + work_group_memory_impl(const work_group_memory_impl &rhs) = default; + work_group_memory_impl & + operator=(const work_group_memory_impl &rhs) = default; + work_group_memory_impl(size_t sz) : size{sz} {} + size_t size; +}; + } // namespace detail namespace ext::oneapi::experimental { template -class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory { +class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory + : sycl::detail::work_group_memory_impl { public: using value_type = std::remove_all_extents_t; + +private: + using element_type = std::remove_extent_t; + using decoratedPtr = typename sycl::detail::DecoratedType< + value_type, access::address_space::local_space>::type *; + +public: work_group_memory() = default; work_group_memory(const work_group_memory &rhs) = default; work_group_memory &operator=(const work_group_memory &rhs) = default; -#ifndef __SYCL_DEVICE_ONLY__ template >> - work_group_memory(handler &) : size{sizeof(DataT)} {} + work_group_memory(handler &) + : sycl::detail::work_group_memory_impl(sizeof(DataT)) {} template >> - work_group_memory(size_t num, handler &cgh) { - size = num * sizeof(std::remove_extent_t); - } -#endif -#ifdef __SYCL_DEVICE_ONLY__ + work_group_memory(size_t num, handler &cgh) + : sycl::detail::work_group_memory_impl( + num * sizeof(std::remove_extent_t)) {} template multi_ptr get_multi_ptr() const { return sycl::address_space_cast(ptr); } - DataT *operator&() const { return reinterpret_cast(ptr); } - operator DataT &() const { return *(this->operator&()); } + __attribute__((always_inline)) DataT *operator&() const { + return reinterpret_cast(ptr); + } + __attribute__((always_inline)) operator DataT &() const { + return *reinterpret_cast(ptr); + } template >> const work_group_memory &operator=(const DataT &value) const { *ptr = value; return *this; } - +#ifdef __SYCL_DEVICE_ONLY__ void __init(decoratedPtr ptr) { this->ptr = ptr; } #endif private: -#ifdef __SYCL_DEVICE_ONLY__ - using decoratedPtr = typename detail::DecoratedType< - value_type, access : address_space::local_space>::type *; decoratedPtr ptr; -#endif - size_t size; - friend class handler; }; } // namespace ext::oneapi::experimental } // namespace _V1 diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index c41055479a3ea..3f783dc35bcfc 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -689,8 +689,8 @@ class __SYCL_EXPORT handler { #endif } template - void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory Arg) { - addArg(detail::kernel_param_kind_t::kind_work_group_memory, nullptr, Arg->size, ArgIndex); + void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { + addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, sizeof(Arg), ArgIndex); } // setArgHelper for non local accessor argument. template - void set_arg(int ArgIndex, ext::oneapi::experimental::work_group_memory Arg) { + void set_arg(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { setArgHelper(ArgIndex, std::move(Arg)); } // set_arg for graph dynamic_parameters diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d8bc8d2258d33..237970133b7e3 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -790,7 +790,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_work_group_memory: { - addArg(kernel_param_kind_t::kind_std_layout, nullptr, Size, + addArg(kernel_param_kind_t::kind_std_layout, nullptr, static_cast(Ptr)->size, Index + IndexShift); break; } From 025cbc4dd40705b01cbbc0f68563dff5c54eb4cf Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 19 Aug 2024 10:23:26 -0700 Subject: [PATCH 004/107] Implement work_group_memory extension --- .../oneapi/experimental/work_group_memory.hpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index ba186446b4dbc..0a5fc480a6f62 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -41,7 +41,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory using value_type = std::remove_all_extents_t; private: - using element_type = std::remove_extent_t; + using reference_type = + std::conditional_t, + std::decay_t, DataT &>; + using pointer_type = + std::conditional_t, + std::decay_t, DataT *>; using decoratedPtr = typename sycl::detail::DecoratedType< value_type, access::address_space::local_space>::type *; @@ -64,11 +69,13 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory return sycl::address_space_cast(ptr); } - __attribute__((always_inline)) DataT *operator&() const { - return reinterpret_cast(ptr); - } - __attribute__((always_inline)) operator DataT &() const { - return *reinterpret_cast(ptr); + pointer_type operator&() const { return reinterpret_cast(ptr); } + operator reference_type() const { + if constexpr (!sycl::detail::is_unbounded_array_v) { + return *(this->operator&()); + } else { + return this->operator&(); + } } template >> From b94f7c94574ff22d43a42b8f1130a9c8a614752e Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 20 Aug 2024 07:22:42 -0700 Subject: [PATCH 005/107] Implement work group memory --- .../sycl/ext/oneapi/experimental/work_group_memory.hpp | 4 ++-- sycl/include/sycl/sycl.hpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 0a5fc480a6f62..2fe160601144d 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -43,7 +43,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory private: using reference_type = std::conditional_t, - std::decay_t, DataT &>; + std::decay_t &, DataT &>; using pointer_type = std::conditional_t, std::decay_t, DataT *>; @@ -74,7 +74,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory if constexpr (!sycl::detail::is_unbounded_array_v) { return *(this->operator&()); } else { - return this->operator&(); + return std::reference_wrapper(this->operator&()); } } template #include #include +#include #include #include #include From 0d6d694dc0f28882560995ee664fa3ee124d53e5 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 21 Aug 2024 19:39:11 -0700 Subject: [PATCH 006/107] Remove debug dumps --- clang/lib/Sema/SemaSYCL.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 79c57356bdb82..da15f56614e47 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -4476,7 +4476,6 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { uint64_t Size; Size = SemaSYCLRef.getASTContext().getTypeSizeInChars(ParamTy).getQuantity(); - ParamTy->dump(); Header.addParamDesc(Kind, static_cast(Size), static_cast(CurOffset + OffsetAdj)); } From 9f2973a51a6dc3de8751328ddc9f9c82c5c0d7cd Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Wed, 21 Aug 2024 22:56:11 -0400 Subject: [PATCH 007/107] Update work_group_memory.hpp --- .../sycl/ext/oneapi/experimental/work_group_memory.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 2fe160601144d..76e333daeb963 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -41,9 +41,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory using value_type = std::remove_all_extents_t; private: + // if DataT is an unbounded array, use as reference_type and + // pointer_type the decayed type of DataT. This is because + // unbounded arrays do not work well with the IR<->SPIRV translator using reference_type = std::conditional_t, - std::decay_t &, DataT &>; + std::decay_t, DataT &>; using pointer_type = std::conditional_t, std::decay_t, DataT *>; @@ -74,7 +77,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory if constexpr (!sycl::detail::is_unbounded_array_v) { return *(this->operator&()); } else { - return std::reference_wrapper(this->operator&()); + return this->operator&(); } } template Date: Thu, 22 Aug 2024 05:27:01 +0200 Subject: [PATCH 008/107] Remove include of deleted header file --- sycl/include/sycl/sycl.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 283d6ee5ca9ee..fc5aff80487f8 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include From 4234022582c7d3162ca73ed0d200967f20f689fa Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 01:31:32 +0200 Subject: [PATCH 009/107] Fix SPIRV compilation errors --- sycl-jit/common/include/Kernel.h | 1 + .../oneapi/experimental/work_group_memory.hpp | 39 +++++++------------ sycl/include/sycl/handler.hpp | 21 +++++----- sycl/source/detail/jit_compiler.cpp | 2 + sycl/source/handler.cpp | 3 +- 5 files changed, 32 insertions(+), 34 deletions(-) diff --git a/sycl-jit/common/include/Kernel.h b/sycl-jit/common/include/Kernel.h index 4b4bb35b9bbf9..2959b2e1b9871 100644 --- a/sycl-jit/common/include/Kernel.h +++ b/sycl-jit/common/include/Kernel.h @@ -58,6 +58,7 @@ enum class ParameterKind : uint32_t { Pointer = 3, SpecConstBuffer = 4, Stream = 5, + WorkGroupMemory = 6, Invalid = 0xF, }; diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 76e333daeb963..0951ccebaedd5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -26,33 +26,28 @@ class work_group_memory_impl { work_group_memory_impl(const work_group_memory_impl &rhs) = default; work_group_memory_impl & operator=(const work_group_memory_impl &rhs) = default; - work_group_memory_impl(size_t sz) : size{sz} {} - size_t size; + work_group_memory_impl(size_t wgm_size, size_t buffer_size) : wgm_size{ wgm_size }, buffer_size{ buffer_size } {} + size_t wgm_size; + size_t buffer_size; }; +inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl * wgm) { + return wgm->wgm_size; +} +inline size_t getWorkGroupMemoryBufferSize(detail::work_group_memory_impl * wgm) { + return wgm->buffer_size; +} } // namespace detail namespace ext::oneapi::experimental { - template class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory : sycl::detail::work_group_memory_impl { public: using value_type = std::remove_all_extents_t; - private: - // if DataT is an unbounded array, use as reference_type and - // pointer_type the decayed type of DataT. This is because - // unbounded arrays do not work well with the IR<->SPIRV translator - using reference_type = - std::conditional_t, - std::decay_t, DataT &>; - using pointer_type = - std::conditional_t, - std::decay_t, DataT *>; using decoratedPtr = typename sycl::detail::DecoratedType< - value_type, access::address_space::local_space>::type *; - + DataT, access::address_space::local_space>::type *; public: work_group_memory() = default; work_group_memory(const work_group_memory &rhs) = default; @@ -60,11 +55,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory template >> work_group_memory(handler &) - : sycl::detail::work_group_memory_impl(sizeof(DataT)) {} + : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), sizeof(DataT)) {} template >> work_group_memory(size_t num, handler &cgh) - : sycl::detail::work_group_memory_impl( + : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} template multi_ptr @@ -72,13 +67,9 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory return sycl::address_space_cast(ptr); } - pointer_type operator&() const { return reinterpret_cast(ptr); } - operator reference_type() const { - if constexpr (!sycl::detail::is_unbounded_array_v) { - return *(this->operator&()); - } else { - return this->operator&(); - } + DataT * operator&() const { return ptr; } + operator DataT&() const { + return *(this->operator&()); } template >> diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 3f783dc35bcfc..b33e7682c61e4 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -161,7 +160,11 @@ template +class work_group_memory; + struct image_descriptor; } // namespace ext::oneapi::experimental @@ -169,7 +172,9 @@ namespace ext::oneapi::experimental::detail { class graph_impl; } // namespace ext::oneapi::experimental::detail namespace detail { - +class work_group_memory_impl; +size_t getWorkGroupMemoryOwnSize(work_group_memory_impl *); +size_t getWorkGroupMemoryBufferSize(work_group_memory_impl *); class handler_impl; class kernel_impl; class queue_impl; @@ -688,9 +693,9 @@ class __SYCL_EXPORT handler { setLocalAccessorArgHelper(ArgIndex, Arg); #endif } - template - void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { - addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, sizeof(Arg), ArgIndex); + template + void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { + addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize(static_cast(Arg)), ArgIndex); } // setArgHelper for non local accessor argument. template - void set_arg(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { + template + void set_arg(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { setArgHelper(ArgIndex, std::move(Arg)); } // set_arg for graph dynamic_parameters diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 2359293851c55..4ff2727a9268c 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -133,6 +133,8 @@ translateArgType(kernel_param_kind_t Kind) { return PK::SpecConstBuffer; case kind::kind_stream: return PK::Stream; + case kind::kind_work_group_memory: + return PK::WorkGroupMemory; case kind::kind_invalid: return PK::Invalid; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 21a99ff475d62..58e2575550529 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -33,6 +33,7 @@ #include #include +#include namespace sycl { inline namespace _V1 { @@ -790,7 +791,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_work_group_memory: { - addArg(kernel_param_kind_t::kind_std_layout, nullptr, static_cast(Ptr)->size, + addArg(kernel_param_kind_t::kind_std_layout, nullptr, detail::getWorkGroupMemoryBufferSize(static_cast(Ptr)), Index + IndexShift); break; } From ae5eb7e628093070d618cea574b64b9cb10ffcdb Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 05:05:16 +0200 Subject: [PATCH 010/107] Remove accidental change --- sycl/include/sycl/handler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index b33e7682c61e4..572016edcbe43 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -923,7 +923,7 @@ class __SYCL_EXPORT handler { // a static assert is made to fail for incompatible kernel lambdas. static_assert( !KernelHasName || sizeof(KernelFunc) == KI::getKernelSize(), - "Unexpected kernel lambda size. Expected This can be caused by an " + "Unexpected kernel lambda size. This can be caused by an " "external host compiler producing a lambda with an " "unexpected layout. This is a limitation of the compiler." "In many cases the difference is related to capturing constexpr " From 8ce0280028c1d3385dfbcff0edd5a02d0273083e Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 05:29:14 +0200 Subject: [PATCH 011/107] Formatting changes --- clang/lib/Sema/SemaSYCL.cpp | 62 +++++++++++++++++++---------------- sycl/include/sycl/handler.hpp | 3 ++ sycl/source/handler.cpp | 10 +++--- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c8aa9e597b87b..3102cde4b0378 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -393,7 +393,8 @@ bool SemaSYCL::isDeclAllowedInSYCLDeviceCode(const Decl *D) { return true; const DeclContext *DC = FD->getDeclContext(); - if (II && II->isStr("__spirv_ocl_printf") && !FD->isDefined() && + if (II && II->isStr("__spirv_ocl_printf") && + !FD->isDefined() && FD->getLanguageLinkage() == CXXLanguageLinkage && DC->getEnclosingNamespaceContext()->isTranslationUnit()) return true; @@ -644,7 +645,9 @@ class DiagDeviceFunction : public RecursiveASTVisitor { // Make sure we skip the condition of the case, since that is a constant // expression. - bool TraverseCaseStmt(CaseStmt *S) { return TraverseStmt(S->getSubStmt()); } + bool TraverseCaseStmt(CaseStmt *S) { + return TraverseStmt(S->getSubStmt()); + } // Skip checking the size expr, since a constant array type loc's size expr is // a constant expression. @@ -909,8 +912,7 @@ class SingleDeviceFunctionTracker { !KernelBody->hasAttr() && !KernelBody->hasAttr()) { KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit( - KernelBody->getASTContext(), {}, - AlwaysInlineAttr::Keyword_forceinline)); + KernelBody->getASTContext(), {}, AlwaysInlineAttr::Keyword_forceinline)); } } @@ -1002,7 +1004,8 @@ class MarkWIScopeFnVisitor : public RecursiveASTVisitor { // not a member of sycl::group - continue search return true; auto Name = Callee->getName(); - if (Name != "wait_for" || Callee->hasAttr()) + if (Name != "wait_for" || + Callee->hasAttr()) return true; // it is a call to sycl::group::wait_for - mark the callee Callee->addAttr( @@ -1238,7 +1241,7 @@ class KernelObjVisitor { template void VisitUnionImpl(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, HandlerTys &...Handlers) { + const CXXRecordDecl *Wrapper, HandlerTys &... Handlers) { (void)std::initializer_list{ (Handlers.enterUnion(Owner, Parent), 0)...}; VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...); @@ -1248,13 +1251,13 @@ class KernelObjVisitor { // These enable handler execution only when previous Handlers succeed. template - bool handleField(FieldDecl *FD, QualType FDTy, Tn &&...tn) { + bool handleField(FieldDecl *FD, QualType FDTy, Tn &&... tn) { bool result = true; (void)std::initializer_list{(result = result && tn(FD, FDTy), 0)...}; return result; } template - bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&...tn) { + bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&... tn) { bool result = true; std::initializer_list{(result = result && tn(BD, BDTy), 0)...}; return result; @@ -1299,7 +1302,7 @@ class KernelObjVisitor { template void visitComplexRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { (void)std::initializer_list{ (Handlers.enterStruct(Owner, Parent, RecordTy), 0)...}; VisitRecordHelper(Wrapper, Wrapper->bases(), Handlers...); @@ -1311,7 +1314,7 @@ class KernelObjVisitor { template void visitSimpleRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { (void)std::initializer_list{ (Handlers.handleNonDecompStruct(Owner, Parent, RecordTy), 0)...}; } @@ -1319,16 +1322,16 @@ class KernelObjVisitor { template void visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &...Handlers); + HandlerTys &... Handlers); template void VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, HandlerTys &...Handlers); + const CXXRecordDecl *Wrapper, HandlerTys &... Handlers); template void VisitRecordHelper(const CXXRecordDecl *Owner, clang::CXXRecordDecl::base_class_const_range Range, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { for (const auto &Base : Range) { QualType BaseTy = Base.getType(); // Handle accessor class as base @@ -1345,14 +1348,14 @@ class KernelObjVisitor { template void VisitRecordHelper(const CXXRecordDecl *Owner, RecordDecl::field_range Range, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { VisitRecordFields(Owner, Handlers...); } template void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField, QualType ElementTy, uint64_t Index, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { (void)std::initializer_list{ (Handlers.nextElement(ElementTy, Index), 0)...}; visitField(Owner, ArrayField, ElementTy, Handlers...); @@ -1360,24 +1363,24 @@ class KernelObjVisitor { template void visitFirstArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, - QualType ElementTy, HandlerTys &...Handlers) { + QualType ElementTy, HandlerTys &... Handlers) { visitArrayElementImpl(Owner, ArrayField, ElementTy, 0, Handlers...); } template void visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, QualType ElementTy, uint64_t Index, - HandlerTys &...Handlers); + HandlerTys &... Handlers); template void visitSimpleArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &...Handlers) { + QualType ArrayTy, HandlerTys &... Handlers) { (void)std::initializer_list{ (Handlers.handleSimpleArrayType(Field, ArrayTy), 0)...}; } template void visitComplexArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &...Handlers) { + QualType ArrayTy, HandlerTys &... Handlers) { // Array workflow is: // handleArrayType // enterArray @@ -1409,7 +1412,7 @@ class KernelObjVisitor { template void visitField(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType FieldTy, HandlerTys &...Handlers) { + QualType FieldTy, HandlerTys &... Handlers) { if (isSyclSpecialType(FieldTy, SemaSYCLRef)) KF_FOR_EACH(handleSyclSpecialType, Field, FieldTy); else if (FieldTy->isStructureOrClassType()) { @@ -1481,14 +1484,14 @@ class KernelObjVisitor { template void VisitRecordBases(const CXXRecordDecl *KernelFunctor, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { VisitRecordHelper(KernelFunctor, KernelFunctor->bases(), Handlers...); } // A visitor function that dispatches to functions as defined in // SyclKernelFieldHandler for the purposes of kernel generation. template - void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &...Handlers) { + void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &... Handlers) { for (const auto Field : Owner->fields()) visitField(Owner, Field, Field->getType(), Handlers...); } @@ -1691,7 +1694,7 @@ template struct AllTrue { template void KernelObjVisitor::VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, - Handlers &...handlers) { + Handlers &... handlers) { // Don't continue descending if none of the handlers 'care'. This could be 'if // constexpr' starting in C++17. Until then, we have to count on the // optimizer to realize "if (false)" is a dead branch. @@ -1705,7 +1708,7 @@ template void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, QualType ElementTy, uint64_t Index, - Handlers &...handlers) { + Handlers &... handlers) { // Don't continue descending if none of the handlers 'care'. This could be 'if // constexpr' starting in C++17. Until then, we have to count on the // optimizer to realize "if (false)" is a dead branch. @@ -1719,7 +1722,8 @@ void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner, template void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, - QualType RecordTy, HandlerTys &...Handlers) { + QualType RecordTy, + HandlerTys &...Handlers) { RecordDecl *RD = RecordTy->getAsRecordDecl(); assert(RD && "should not be null."); if (RD->hasAttr()) { @@ -1764,7 +1768,7 @@ void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, template void KernelObjVisitor::visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &...Handlers) { + QualType ArrayTy, HandlerTys &... Handlers) { if (Field->hasAttr()) { visitComplexArray(Owner, Field, ArrayTy, Handlers...); @@ -6156,6 +6160,7 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "\n"; } + O << "// names of all kernels defined in the corresponding source\n"; O << "static constexpr\n"; O << "const char* const kernel_names[] = {\n"; @@ -6637,7 +6642,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { for (const VarDecl *VD : GlobalVars) { VD = VD->getCanonicalDecl(); - // Skip if this isn't a SpecIdType, DeviceGlobal, or HostPipe. This + // Skip if this isn't a SpecIdType, DeviceGlobal, or HostPipe. This // can happen if it was a deduced type. if (!SemaSYCL::isSyclType(VD->getType(), SYCLTypeAttr::specialization_id) && !SemaSYCL::isSyclType(VD->getType(), SYCLTypeAttr::host_pipe) && @@ -6682,7 +6687,8 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { VD->getNameForDiagnostic(HostPipesOS, Policy, true); } HostPipesOS << ", \""; - HostPipesOS << SYCLUniqueStableIdExpr::ComputeName(S.getASTContext(), VD); + HostPipesOS << SYCLUniqueStableIdExpr::ComputeName(S.getASTContext(), + VD); HostPipesOS << "\");\n"; } else { EmittedFirstSpecConstant = true; diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 572016edcbe43..ea3ce36aceaed 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -172,6 +172,7 @@ namespace ext::oneapi::experimental::detail { class graph_impl; } // namespace ext::oneapi::experimental::detail namespace detail { + class work_group_memory_impl; size_t getWorkGroupMemoryOwnSize(work_group_memory_impl *); size_t getWorkGroupMemoryBufferSize(work_group_memory_impl *); @@ -693,6 +694,7 @@ class __SYCL_EXPORT handler { setLocalAccessorArgHelper(ArgIndex, Arg); #endif } + template void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize(static_cast(Arg)), ArgIndex); @@ -918,6 +920,7 @@ class __SYCL_EXPORT handler { constexpr bool KernelHasName = KI::getName() != nullptr && KI::getName()[0] != '\0'; + // Some host compilers may have different captures from Clang. Currently // there is no stable way of handling this when extracting the captures, so // a static assert is made to fail for incompatible kernel lambdas. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 58e2575550529..2d3bedfda2dea 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "sycl/detail/helpers.hpp" #include "ur_api.h" +#include "sycl/detail/helpers.hpp" #include #include @@ -1133,7 +1133,7 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); Desc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); + MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; ur_image_desc_t UrDesc = {}; @@ -1186,8 +1186,8 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); ImageDesc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); - MDstPtr = reinterpret_cast(Dest.raw_handle); + MSrcPtr = reinterpret_cast(Src.raw_handle); + MDstPtr = reinterpret_cast(Dest.raw_handle); ur_image_desc_t UrDesc = {}; UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; @@ -1241,7 +1241,7 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); SrcImgDesc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); + MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; ur_image_desc_t UrDesc = {}; From 4ee31a58be2e22d81a5ace13dd7bdd7b2e43dd55 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 05:37:30 +0200 Subject: [PATCH 012/107] Formatting changes --- clang/lib/Sema/SemaSYCL.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 3102cde4b0378..51004626e0a7a 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -393,8 +393,8 @@ bool SemaSYCL::isDeclAllowedInSYCLDeviceCode(const Decl *D) { return true; const DeclContext *DC = FD->getDeclContext(); - if (II && II->isStr("__spirv_ocl_printf") && - !FD->isDefined() && + if (II && II->isStr("__spirv_ocl_printf") && + !FD->isDefined() && FD->getLanguageLinkage() == CXXLanguageLinkage && DC->getEnclosingNamespaceContext()->isTranslationUnit()) return true; @@ -645,7 +645,7 @@ class DiagDeviceFunction : public RecursiveASTVisitor { // Make sure we skip the condition of the case, since that is a constant // expression. - bool TraverseCaseStmt(CaseStmt *S) { + bool TraverseCaseStmt(CaseStmt *S) { return TraverseStmt(S->getSubStmt()); } @@ -1004,8 +1004,8 @@ class MarkWIScopeFnVisitor : public RecursiveASTVisitor { // not a member of sycl::group - continue search return true; auto Name = Callee->getName(); - if (Name != "wait_for" || - Callee->hasAttr()) + if (Name != "wait_for" || + Callee->hasAttr()) return true; // it is a call to sycl::group::wait_for - mark the callee Callee->addAttr( @@ -1723,7 +1723,7 @@ template void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &...Handlers) { + HandlerTys &... Handlers) { RecordDecl *RD = RecordTy->getAsRecordDecl(); assert(RD && "should not be null."); if (RD->hasAttr()) { @@ -6688,7 +6688,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { } HostPipesOS << ", \""; HostPipesOS << SYCLUniqueStableIdExpr::ComputeName(S.getASTContext(), - VD); + VD); HostPipesOS << "\");\n"; } else { EmittedFirstSpecConstant = true; From 7b1b90bcd5451c26058b4eb7f094d7ca69434838 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 05:39:04 +0200 Subject: [PATCH 013/107] Put the work group memory doc to supported --- .../sycl_ext_oneapi_work_group_memory.asciidoc | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename sycl/doc/extensions/{proposed => supported}/sycl_ext_oneapi_work_group_memory.asciidoc (100%) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_work_group_memory.asciidoc similarity index 100% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc rename to sycl/doc/extensions/supported/sycl_ext_oneapi_work_group_memory.asciidoc From cf7476e036d4f1bc3e37282b36ed0561f768836f Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 22 Aug 2024 20:54:21 -0700 Subject: [PATCH 014/107] More formatting changes --- clang/lib/Sema/SemaSYCL.cpp | 3 ++- .../sycl/ext/oneapi/experimental/test.cpp | 19 ++++++++++++++ .../oneapi/experimental/work_group_memory.hpp | 26 +++++++++++-------- sycl/include/sycl/handler.hpp | 25 +++++++++++------- sycl/source/handler.cpp | 2 +- 5 files changed, 53 insertions(+), 22 deletions(-) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/test.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 51004626e0a7a..7d9094b12e4c9 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -4519,7 +4519,8 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::stream)) { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { - addParam(FieldTy, SYCLIntegrationHeader::kind_work_group_memory, offsetOf(FD, FieldTy)); + addParam(FieldTy, SYCLIntegrationHeader::kind_work_group_memory, + offsetOf(FD, FieldTy)); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::sampler) || SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::annotated_ptr) || SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::annotated_arg)) { diff --git a/sycl/include/sycl/ext/oneapi/experimental/test.cpp b/sycl/include/sycl/ext/oneapi/experimental/test.cpp new file mode 100644 index 0000000000000..f2143f221be9f --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/test.cpp @@ -0,0 +1,19 @@ +#include + +using namespace sycl; + +template +struct ptr + +int main() { + queue q; + int magic; + buffer b{&magic, 1}; + q.submit([&](handler& h) { + sycl::accessor acc{b, h}; + local_accessor acc{h}; + h.parallel_for(nd_range<2>({2, 2}, {1, 1}), [=](nd_item<2> it) { + acc[it.get_local_id(0)][it.get_local_id(1)] = 42; +}); +}); +} diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 0951ccebaedd5..c6999cb3c510a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -26,16 +26,18 @@ class work_group_memory_impl { work_group_memory_impl(const work_group_memory_impl &rhs) = default; work_group_memory_impl & operator=(const work_group_memory_impl &rhs) = default; - work_group_memory_impl(size_t wgm_size, size_t buffer_size) : wgm_size{ wgm_size }, buffer_size{ buffer_size } {} + work_group_memory_impl(size_t wgm_size, size_t buffer_size) + : wgm_size{wgm_size}, buffer_size{buffer_size} {} size_t wgm_size; size_t buffer_size; }; -inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl * wgm) { - return wgm->wgm_size; +inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { + return wgm->wgm_size; } -inline size_t getWorkGroupMemoryBufferSize(detail::work_group_memory_impl * wgm) { - return wgm->buffer_size; +inline size_t +getWorkGroupMemoryBufferSize(detail::work_group_memory_impl *wgm) { + return wgm->buffer_size; } } // namespace detail @@ -45,9 +47,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory : sycl::detail::work_group_memory_impl { public: using value_type = std::remove_all_extents_t; + private: using decoratedPtr = typename sycl::detail::DecoratedType< DataT, access::address_space::local_space>::type *; + public: work_group_memory() = default; work_group_memory(const work_group_memory &rhs) = default; @@ -55,11 +59,13 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory template >> work_group_memory(handler &) - : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), sizeof(DataT)) {} + : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), + sizeof(DataT)) {} template >> work_group_memory(size_t num, handler &cgh) - : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), + : sycl::detail::work_group_memory_impl( + sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} template multi_ptr @@ -67,10 +73,8 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory return sycl::address_space_cast(ptr); } - DataT * operator&() const { return ptr; } - operator DataT&() const { - return *(this->operator&()); - } + DataT *operator&() const { return ptr; } + operator DataT &() const { return *(this->operator&()); } template >> const work_group_memory &operator=(const DataT &value) const { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ea3ce36aceaed..b4abd989db921 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -160,10 +160,8 @@ template -class work_group_memory; +template class work_group_memory; struct image_descriptor; } // namespace ext::oneapi::experimental @@ -696,9 +694,13 @@ class __SYCL_EXPORT handler { } template - void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { - addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize(static_cast(Arg)), ArgIndex); -} + void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory< + DataT, PropertyListT> &&Arg) { + addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, + detail::getWorkGroupMemoryOwnSize( + static_cast(Arg)), + ArgIndex); + } // setArgHelper for non local accessor argument. template @@ -2032,10 +2034,15 @@ class __SYCL_EXPORT handler { setArgHelper(ArgIndex, std::move(Arg)); } - template - void set_arg(int ArgIndex, ext::oneapi::experimental::work_group_memory &&Arg) { + template + void + set_arg(int ArgIndex, + ext::oneapi::experimental::work_group_memory + &&Arg) { setArgHelper(ArgIndex, std::move(Arg)); -} + } + // set_arg for graph dynamic_parameters template void set_arg(int argIndex, diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 2d3bedfda2dea..43ebeb0bc4a78 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -32,8 +32,8 @@ #include #include -#include #include +#include namespace sycl { inline namespace _V1 { From 50c0954d5b914f2b1ca1568dc3d3bf850e71b27a Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 00:00:11 -0400 Subject: [PATCH 015/107] Delete sycl/include/sycl/ext/oneapi/experimental/test.cpp --- .../sycl/ext/oneapi/experimental/test.cpp | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 sycl/include/sycl/ext/oneapi/experimental/test.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/test.cpp b/sycl/include/sycl/ext/oneapi/experimental/test.cpp deleted file mode 100644 index f2143f221be9f..0000000000000 --- a/sycl/include/sycl/ext/oneapi/experimental/test.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include - -using namespace sycl; - -template -struct ptr - -int main() { - queue q; - int magic; - buffer b{&magic, 1}; - q.submit([&](handler& h) { - sycl::accessor acc{b, h}; - local_accessor acc{h}; - h.parallel_for(nd_range<2>({2, 2}, {1, 1}), [=](nd_item<2> it) { - acc[it.get_local_id(0)][it.get_local_id(1)] = 42; -}); -}); -} From 44811b85b9f62be8bb0887ad55148a8d26639714 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 22 Aug 2024 21:06:52 -0700 Subject: [PATCH 016/107] Yet more formatting changes --- sycl/source/handler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 43ebeb0bc4a78..0ff2c785cb8f7 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -791,7 +791,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_work_group_memory: { - addArg(kernel_param_kind_t::kind_std_layout, nullptr, detail::getWorkGroupMemoryBufferSize(static_cast(Ptr)), + addArg(kernel_param_kind_t::kind_std_layout, nullptr, + detail::getWorkGroupMemoryBufferSize( + static_cast(Ptr)), Index + IndexShift); break; } From d343a2e9cc9154efc5d432f1faba86461806f238 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 00:17:48 -0400 Subject: [PATCH 017/107] Fix warnings on Linux --- sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index c6999cb3c510a..05c8dedfe8265 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -63,7 +63,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory sizeof(DataT)) {} template >> - work_group_memory(size_t num, handler &cgh) + work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl( sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} From 3f1bc3075b1f7bb684802b1455da26377a08b635 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 00:20:50 -0400 Subject: [PATCH 018/107] Remove unnecessary forward declaration from handler.hpp --- sycl/include/sycl/handler.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index b4abd989db921..8a14e8e8d9a7d 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -173,7 +173,6 @@ namespace detail { class work_group_memory_impl; size_t getWorkGroupMemoryOwnSize(work_group_memory_impl *); -size_t getWorkGroupMemoryBufferSize(work_group_memory_impl *); class handler_impl; class kernel_impl; class queue_impl; From 103e2336ad3bbe426717cf26057c321e9be4faba Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 00:25:12 -0400 Subject: [PATCH 019/107] Remove rvalue references in favor of const lvalue references --- sycl/include/sycl/handler.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8a14e8e8d9a7d..f0800c47bb6ab 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -693,8 +693,8 @@ class __SYCL_EXPORT handler { } template - void setArgHelper(int ArgIndex, ext::oneapi::experimental::work_group_memory< - DataT, PropertyListT> &&Arg) { + void setArgHelper(int ArgIndex, const ext::oneapi::experimental::work_group_memory< + DataT, PropertyListT> &Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize( static_cast(Arg)), @@ -2037,9 +2037,9 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::empty_properties_t> void set_arg(int ArgIndex, - ext::oneapi::experimental::work_group_memory - &&Arg) { - setArgHelper(ArgIndex, std::move(Arg)); + const ext::oneapi::experimental::work_group_memory + &Arg) { + setArgHelper(ArgIndex, Arg)); } // set_arg for graph dynamic_parameters From bfa5830942d28ea59ca47750dc05ba2ed73cec9d Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 00:32:32 -0400 Subject: [PATCH 020/107] Fix syntax errors --- sycl/include/sycl/handler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index f0800c47bb6ab..bc10dc2358b02 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2039,7 +2039,7 @@ class __SYCL_EXPORT handler { set_arg(int ArgIndex, const ext::oneapi::experimental::work_group_memory &Arg) { - setArgHelper(ArgIndex, Arg)); + setArgHelper(ArgIndex, Arg); } // set_arg for graph dynamic_parameters From 2031478ca886dfc280f84db5be1f29d61086a155 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 22 Aug 2024 21:37:27 -0700 Subject: [PATCH 021/107] Fix syntax errors --- sycl/include/sycl/handler.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index bc10dc2358b02..7b8639c8e4c2b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -693,8 +693,10 @@ class __SYCL_EXPORT handler { } template - void setArgHelper(int ArgIndex, const ext::oneapi::experimental::work_group_memory< - DataT, PropertyListT> &Arg) { + void setArgHelper( + int ArgIndex, + const ext::oneapi::experimental::work_group_memory + &Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize( static_cast(Arg)), @@ -2035,10 +2037,8 @@ class __SYCL_EXPORT handler { template - void - set_arg(int ArgIndex, - const ext::oneapi::experimental::work_group_memory - &Arg) { + void set_arg(int ArgIndex, const ext::oneapi::experimental::work_group_memory< + DataT, PropertyListT> &Arg) { setArgHelper(ArgIndex, Arg); } From 76f0acce98e64276f37080cd56f073261b722389 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 15:22:06 +0200 Subject: [PATCH 022/107] Don't explicitly make the work_group_memory class device-copyable as it already is --- .../oneapi/experimental/work_group_memory.hpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 0951ccebaedd5..5dc69b9cf2690 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include namespace sycl { inline namespace _V1 { @@ -58,36 +59,31 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), sizeof(DataT)) {} template >> - work_group_memory(size_t num, handler &cgh) + work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} template multi_ptr get_multi_ptr() const { return sycl::address_space_cast(ptr); + IsDecorated, value_type>(data_ptr); } - DataT * operator&() const { return ptr; } + DataT * operator&() const { return data_ptr; } operator DataT&() const { return *(this->operator&()); } template >> const work_group_memory &operator=(const DataT &value) const { - *ptr = value; + *data_ptr = value; return *this; } #ifdef __SYCL_DEVICE_ONLY__ - void __init(decoratedPtr ptr) { this->ptr = ptr; } + void __init(__OPENCL_LOCAL_AS__ DataT data) { this->data_ptr = &data; } #endif private: - decoratedPtr ptr; + decoratedPtr data_ptr; }; } // namespace ext::oneapi::experimental } // namespace _V1 - -template -struct is_device_copyable> - : std::true_type {}; - } // namespace sycl From 3513251fc89f338e9cbd46e07af8c16a4f502b10 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 16:04:58 +0200 Subject: [PATCH 023/107] Remove some more unnecessary code --- .../sycl/ext/oneapi/experimental/work_group_memory.hpp | 6 +----- sycl/include/sycl/handler.hpp | 3 ++- sycl/source/handler.cpp | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 95f096f7851e7..b50593c3ad96e 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -8,8 +8,8 @@ #pragma once -#include #include +#include namespace sycl { inline namespace _V1 { @@ -36,10 +36,6 @@ class work_group_memory_impl { inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { return wgm->wgm_size; } -inline size_t -getWorkGroupMemoryBufferSize(detail::work_group_memory_impl *wgm) { - return wgm->buffer_size; -} } // namespace detail namespace ext::oneapi::experimental { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 7b8639c8e4c2b..990c585f78fba 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -699,9 +699,10 @@ class __SYCL_EXPORT handler { &Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize( - static_cast(Arg)), + static_cast *>(Arg)), ArgIndex); } + // setArgHelper for non local accessor argument. template diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 0ff2c785cb8f7..0e8d927050690 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -792,8 +792,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } case kernel_param_kind_t::kind_work_group_memory: { addArg(kernel_param_kind_t::kind_std_layout, nullptr, - detail::getWorkGroupMemoryBufferSize( - static_cast(Ptr)), + static_cast(Ptr)->buffer_size, Index + IndexShift); break; } From 2cec9976572123c4d4e4bd7673a246d407a8d2e9 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 10:06:41 -0400 Subject: [PATCH 024/107] Update work_group_memory.hpp --- .../sycl/ext/oneapi/experimental/work_group_memory.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index b50593c3ad96e..5b0582286d31a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -68,18 +68,18 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory multi_ptr get_multi_ptr() const { return sycl::address_space_cast(data_ptr); + IsDecorated, value_type>(ptr); } DataT *operator&() const { return ptr; } operator DataT &() const { return *(this->operator&()); } template >> const work_group_memory &operator=(const DataT &value) const { - *data_ptr = value; + *ptr = value; return *this; } #ifdef __SYCL_DEVICE_ONLY__ - void __init(__OPENCL_LOCAL_AS__ DataT data) { this->data_ptr = &data; } + void __init(decoratedPtr ptr) { this->ptr = ptr; } #endif private: decoratedPtr data_ptr; From 4c8b196dd753303fb8e8c7dde7b88c82b46aca8f Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 10:06:54 -0400 Subject: [PATCH 025/107] Update work_group_memory.hpp --- sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 5b0582286d31a..979ff34490ef9 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -82,7 +82,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory void __init(decoratedPtr ptr) { this->ptr = ptr; } #endif private: - decoratedPtr data_ptr; + decoratedPtr ptr; }; } // namespace ext::oneapi::experimental } // namespace _V1 From a0b70e2ac6c5ab6530562ecaa93f8bba149802b5 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 23 Aug 2024 07:19:54 -0700 Subject: [PATCH 026/107] Formatting --- sycl/include/sycl/handler.hpp | 3 ++- sycl/source/handler.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 990c585f78fba..809eed806b5a1 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -699,7 +699,8 @@ class __SYCL_EXPORT handler { &Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize( - static_cast *>(Arg)), + static_cast *>(Arg)), ArgIndex); } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 0e8d927050690..f2fae01d418f8 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -792,7 +792,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } case kernel_param_kind_t::kind_work_group_memory: { addArg(kernel_param_kind_t::kind_std_layout, nullptr, - static_cast(Ptr)->buffer_size, + static_cast(Ptr)->buffer_size, Index + IndexShift); break; } From ed8f125cfd502e9b3609374ba15ec4bcbd2ffea2 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 17:10:56 +0200 Subject: [PATCH 027/107] Move doc to experimental folder --- .../sycl_ext_oneapi_work_group_memory.asciidoc | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename sycl/doc/extensions/{supported => experimental}/sycl_ext_oneapi_work_group_memory.asciidoc (100%) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_work_group_memory.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc similarity index 100% rename from sycl/doc/extensions/supported/sycl_ext_oneapi_work_group_memory.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc From 94608763e9ae0445ed268dee54f7184ea93cbbe1 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 23 Aug 2024 17:11:18 +0200 Subject: [PATCH 028/107] Update status section in doc --- .../sycl_ext_oneapi_work_group_memory.asciidoc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc index 9a7875c6987ab..a8857dd213168 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc @@ -49,12 +49,10 @@ This extension also depends on the following other SYCL extensions: == Status -This is a proposed extension specification, intended to gather community -feedback. -Interfaces defined in this specification may not be implemented yet or may be -in a preliminary state. -The specification itself may also change in incompatible ways before it is -finalized. +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. *Shipping software products should not rely on APIs defined in this specification.* From e2889b367638c465fd136b30c003b972df651c41 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 23 Aug 2024 08:17:52 -0700 Subject: [PATCH 029/107] Final fixes --- sycl/include/sycl/handler.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 809eed806b5a1..f1ec3d9178953 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -699,8 +699,7 @@ class __SYCL_EXPORT handler { &Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, detail::getWorkGroupMemoryOwnSize( - static_cast *>(Arg)), + static_cast(&Arg)), ArgIndex); } From d6c78b929fe45349727b2d1542fcefb217dcfcb4 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:23:19 -0400 Subject: [PATCH 030/107] Remove unnecessary include --- sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 979ff34490ef9..fccb58ed25137 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include namespace sycl { From 8f7a07b420100e0af46e3c8a443661b251a0b325 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Wed, 28 Aug 2024 23:27:06 +0200 Subject: [PATCH 031/107] Add initial tests for work_group_memory extension --- .../oneapi/experimental/work_group_memory.hpp | 21 ++- .../swap_test_using_work_group_memory.cpp | 80 +++++++++ .../work_group_memory_sanity_test.cpp | 170 ++++++++++++++++++ 3 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp create mode 100644 sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index fccb58ed25137..61afd288a4c4c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -35,6 +35,25 @@ class work_group_memory_impl { inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { return wgm->wgm_size; } + +// The following 3 functions help us get the address of the first element of a multi-dimensional +// array, be it bounded or unbounded. A scalar is also included. In that case, it just returns +// the address of the scalar. +template +auto getData(DataT& scalar) { + return &scalar; +} + +template +auto getData(DataT (&bounded_arr)[N]) { + return getData(bounded_arr[0]); +} + +template +auto getData(DataT (&unbounded_arr)[]) { + return getData(unbounded_arr[0]); +} + } // namespace detail namespace ext::oneapi::experimental { @@ -67,7 +86,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory multi_ptr get_multi_ptr() const { return sycl::address_space_cast(ptr); + IsDecorated, value_type>(sycl::detail::getData(*ptr)); } DataT *operator&() const { return ptr; } operator DataT &() const { return *(this->operator&()); } diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp new file mode 100644 index 0000000000000..903553cdb64a4 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp @@ -0,0 +1,80 @@ +// RUN: %{build} -o %{t.out} +// RUN: %{run} %{t.out} + +#include +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +// This test performs a swap of two scalars/arrays inside a kernel using a work_group_memory object as a temporary buffer. +// The test is done for scalars types, bounded and unbounded arrays. After the kernel finishes, it is verified on the host side +// that the swap worked. + +template< typename T> +void swap_scalar(T& a, T& b) { + sycl::queue q; + const T old_a = a; + const T old_b = b; + { + sycl::buffer buf_a{ &a, 1}; + sycl::buffer buf_b{ &b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + cgh.single_task([=]() { + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + acc_b[0] = temp; + });}); + } + assert(a == old_b && b == old_a && "Swap assertion failed"); +} + +template +void swap_bounded_array_1d(T (&a)[N], T (&b)[N]) { +sycl::queue q; + T old_a[N]; + std::memcpy(old_a, a, sizeof(a)); + T old_b[N]; + std::memcpy(old_b, b, sizeof(b)); + { + sycl::buffer buf_a{ a, N}; + sycl::buffer buf_b{ b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + cgh.single_task([=]() { +for (int i= 0; i < N; ++i) { + temp[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = temp[i]; +} + });}); + } +for (int i = 0; i < N; ++i) { + assert(a[i] == old_b[i] && b[i] == old_a[i] && "Swap assertion failed"); +} + +} +int main() { + int a = 25; + int b = 42; + int arr1[5] = {0, 1, 2, 3, 4}; + int arr2[5] = {5, 6, 7, 8, 9}; + swap_scalar(a, b); + swap_bounded_array_1d(arr1, arr2); +return 0; +} + + + + + + + + + diff --git a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp new file mode 100644 index 0000000000000..b7905c573dae4 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp @@ -0,0 +1,170 @@ +// RUN: %{build} -o %{t.out} +// RUN: %{run} %{t.out} + +#include +#include +#include +#include +#include +#include + +// Sanity test that checks to see if idiomatic code involving work_group_memory objects compiles and runs with no errors. + +namespace syclex = sycl::ext::oneapi::experimental; +sycl::queue global_q; + +constexpr size_t SIZE = 4096; +constexpr size_t WGSIZE = 256; + +struct point { + int x; + int y; +}; + +void simple_inc(const syclex::work_group_memory& mem) { + mem++; +} + +void fancy_inc(syclex::work_group_memory mem) { + syclex::work_group_memory t = mem; + t = mem; + t++; +} + +void test_breadth() { + sycl::queue q; + global_q = q; + + int *res = sycl::malloc_host(16, q); + + q.submit([&](sycl::handler &cgh) { + syclex::work_group_memory mem1{cgh}; + syclex::work_group_memory mem2{cgh}; + syclex::work_group_memory mem3{cgh}; + syclex::work_group_memory mem4{5, cgh}; + syclex::work_group_memory mem5{2, cgh}; + syclex::work_group_memory mem6{2, cgh}; + syclex::work_group_memory mem7{cgh}; + syclex::work_group_memory mem8{2, cgh}; + + cgh.single_task([=] { + // Operations on scalar + ++mem1; + mem1++; + mem1 += 1; + mem1 = mem1 + 1; + int *p1 = &mem1; (*p1)++; + simple_inc(mem1); + fancy_inc(mem1); + res[0] = *(mem1.get_multi_ptr()); + res[1] = mem1; + + // Operations on bounded array + mem2[4] = mem2[4] + 1; + int (*p2)[10] = &mem2; (*p2)[4]++; + res[2] = mem2.get_multi_ptr()[4]; + res[3] = mem2[4]; + + mem3[4] = mem3[4] + 1; + int (*p3)[10] = &mem3; (*p3)[4]++; + res[4] = mem3.get_multi_ptr()[4]; + res[5] = mem3[4]; + + // Operations on unbounded array + mem4[4] = mem4[4] + 1; + int (*p4)[] = &mem4; (*p4)[4]++; + res[6] = mem4.get_multi_ptr()[4]; + res[7] = mem4[4]; + + // Operations on unbounded multi-dimensional array + mem5[1][5] = mem5[1][5] + 1; + mem5[1][7] = mem5[1][7] + 1; + res[8] = mem5.get_multi_ptr()[10 + 5]; + res[9] = mem5[1][7]; + + mem6[1][5] = mem6[1][5] + 1; + mem6[1][7] = mem6[1][7] + 1; + res[10] = mem6.get_multi_ptr()[10 + 5]; + res[11] = mem6[1][7]; + + // Operations on scalar struct + (&mem7)->x++; + (&mem7)->y += 1; + point pnt = mem7; + pnt.x++; + pnt.y++; + mem7 = pnt; + res[12] = (&mem7)->x; + res[13] = (&mem7)->y; + + // Operations on unbounded multi-dimensional array of struct + mem8[1][5].x++; + mem8[1][5].y += 1; + res[14] = mem8.get_multi_ptr()[10 + 5].x; + res[15] = mem8[1][5].y; + }); + }).wait(); +} + +void test_basic() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + // Allocate one element for each work-item in the work-group. + syclex::work_group_memory mem{cgh}; + + sycl::nd_range ndr{{SIZE}, {WGSIZE}}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + size_t id = it.get_local_linear_id(); + + // Each work-item has its own dedicated element of the array. + mem[id] = 0; + }); + }).wait(); +} + +void test_operations() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + syclex::work_group_memory mem1{cgh}; // scalar + syclex::work_group_memory mem2{cgh}; // bounded array + syclex::work_group_memory mem3{5, cgh}; // unbounded array + syclex::work_group_memory mem4{2, cgh}; // multi-dimensional array + syclex::work_group_memory mem5{cgh}; // array of struct + + sycl::nd_range ndr{{SIZE}, {WGSIZE}}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + if (it.get_group().leader()) { + // A "work_group_memory" templated on a scalar type acts much like the + // enclosed scalar type. + ++mem1; + mem1++; + mem1 += 1; + mem1 = mem1 + 1; + int *p1 = &mem1; + + // A "work_group_memory" templated on an array type (either bounded or + // unbounded) acts like an array. + ++mem2[4]; + mem2[4]++; + mem2[4] = mem2[4] + 1; + int *p2 = &mem2[4]; + + // A multi-dimensional array works as expected. + mem4[1][5] = mem4[1][5] + 1; + mem4[1][7] = mem4[1][7] + 1; + + // An array of structs works as expected too. + mem5[1].x++; + mem5[1].y = mem5[1].y + 1; + } + }); + }).wait(); +} + +int main() { + test_breadth(); + test_basic(); + test_operations(); +} From ae598996dcc9288d3f4058355a03812cdcee0a96 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 29 Aug 2024 07:37:06 +0200 Subject: [PATCH 032/107] Add E2E tests for work group memory --- .../oneapi/experimental/work_group_memory.hpp | 2 +- .../swap_test_using_work_group_memory.cpp | 265 ++++++++++++++++-- .../work_group_memory_sanity_test.cpp | 7 +- 3 files changed, 246 insertions(+), 28 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 61afd288a4c4c..59c5c6f92ab85 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -22,7 +22,7 @@ inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; class work_group_memory_impl { public: - work_group_memory_impl() = default; + work_group_memory_impl() : wgm_size{ 0 }, buffer_size{ 0 } {} work_group_memory_impl(const work_group_memory_impl &rhs) = default; work_group_memory_impl & operator=(const work_group_memory_impl &rhs) = default; diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp index 903553cdb64a4..dfb7bdae2fc50 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp @@ -1,5 +1,5 @@ -// RUN: %{build} -o %{t.out} -// RUN: %{run} %{t.out} +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out #include #include @@ -9,8 +9,14 @@ namespace syclexp = sycl::ext::oneapi::experimental; // This test performs a swap of two scalars/arrays inside a kernel using a work_group_memory object as a temporary buffer. -// The test is done for scalars types, bounded and unbounded arrays. After the kernel finishes, it is verified on the host side +// The test is done for scalar types and bounded arrays. After the kernel finishes, it is verified on the host side // that the swap worked. + +// One important note is that for unbounded arrays, the feature is unstable. Specifically, the code may or may not compile +// when kernels reference work group memory objects that have been constructed with the unbounded array type. This is due to a +// limitation of SPIRV where it does not allow arrays of length zero. For example, an unbounded array may be translated to an array of length zero in +// LLVM IR and during the LLVM IR -> SPIRV translation phase, the translator rejects all arrays of length zero because they are invalid constructs in SPIRV. +// As a result of this, unbounded arrays do not appear in this test. They do appear in the sanity test though in this directory because there the unbounded arrays are used with concrete subscript indices which seems to work, for now at least. template< typename T> void swap_scalar(T& a, T& b) { @@ -30,11 +36,69 @@ void swap_scalar(T& a, T& b) { acc_b[0] = temp; });}); } - assert(a == old_b && b == old_a && "Swap assertion failed"); + assert(a == old_b && b == old_a && "Incorrect swap!"); + // swap again but this time using two temporaries. The first temporary will be used to save the value of a + // and the second temporay will be default-constructed and then copy-assigned from the first temporary + // to be then used to write that value to b. + { + sycl::buffer buf_a{ &a, 1}; + sycl::buffer buf_b{ &b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + syclexp::work_group_memory temp2; + cgh.single_task([=]() { + temp2 = temp; // temp and temp2 have the same underlying data + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + acc_b[0] = temp2; // safe to use temp2 + });}); + } + // Two swaps same as no swaps + assert(a == old_a && b == old_b && "Incorrect swap!"); + // Initialize a second temporary and instead of assigning the first temporary to it, assign only the value of the data + // of the first temporary so that unlike above, the two temporaries will not be aliasing the same memory location but they + // will have equal values. + { + sycl::buffer buf_a{ &a, 1}; + sycl::buffer buf_b{ &b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + syclexp::work_group_memory temp2{ cgh }; + cgh.single_task([=]() { + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + temp2 = *(temp.get_multi_ptr()); // temp2 now has the same value as temp but not the same memory location + acc_b[0] = temp2; + });}); + } + // Three swaps same as one swap + assert(a == old_b && b == old_a && "Incorrect swap!"); + // Same as above but instead of using multi_ptr, use address-of operator. + { + sycl::buffer buf_a{ &a, 1}; + sycl::buffer buf_b{ &b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; +syclexp::work_group_memory temp2{ cgh }; + cgh.single_task([=]() { + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + temp2 = *(&temp); + acc_b[0] = temp2; + });}); + } + // Four swaps same as no swap + assert(a == old_a && b == old_b && "Incorrect swap!"); } template -void swap_bounded_array_1d(T (&a)[N], T (&b)[N]) { +void swap_array_1d(T (&a)[N], T (&b)[N]) { sycl::queue q; T old_a[N]; std::memcpy(old_a, a, sizeof(a)); @@ -56,25 +120,182 @@ for (int i= 0; i < N; ++i) { });}); } for (int i = 0; i < N; ++i) { - assert(a[i] == old_b[i] && b[i] == old_a[i] && "Swap assertion failed"); + assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); +} + +// Instead of working with the temporary work group memory object, we retrieve its corresponding +// multi-pointer and work with it instead. +{ + sycl::buffer buf_a{ a, N}; + sycl::buffer buf_b{ b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + cgh.single_task([=]() { + auto ptr = temp.get_multi_ptr(); +for (int i= 0; i < N; ++i) { + ptr[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = ptr[i]; +} + });}); + } +// Two swaps same as ono swap +for (int i = 0; i < N; ++i) { +assert(a[i] == old_a[i] && b[i] == old_b[i] && "Incorrect swap!"); +} + +// Same as above but use a pointer returned by the address-of operator instead. +{ + sycl::buffer buf_a{ a, N}; + sycl::buffer buf_b{ b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + cgh.single_task([=]() { +auto ptr = &temp; +for (int i= 0; i < N; ++i) { + (*ptr)[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = (*ptr)[i]; +} + });}); + } +// Three swaps same as one swap +for (int i = 0; i < N; ++i) { + assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); + } - } + + +template +void swap_array_2d(T (&a)[N][M], T (&b)[N][M]) { +sycl::queue q; + T old_a[N][M]; +for (int i = 0; i < N; ++i) { + std::memcpy(old_a[i], a[i], sizeof(a[0])); +} + T old_b[N][M]; +for (int i = 0; i < N; ++i) { + + std::memcpy(old_b[i], b[i], sizeof(b[0])); +} + { + sycl::buffer buf_a{ a[0], sycl::range{N, M}}; + sycl::buffer buf_b{ b[0], sycl::range{N, M}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + cgh.single_task([=]() { +for (int i= 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { + temp[i][j]= acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + acc_b[i][j] = temp[i][j]; +} +} + });}); + } +for (int i = 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { + assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && "Incorrect swap!"); +} +} + +// Perform the swap but this time use two temporary work group memory objects. +// One will save the value of acc_a and the other will be copy-assigned from it +// and will be used to write the values back to acc_b. +{ +sycl::buffer buf_a{ a[0], sycl::range{N, M}}; + sycl::buffer buf_b{ b[0], sycl::range{N, M}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + syclexp::work_group_memory temp2{ cgh }; + cgh.single_task([=]() { +for (int i= 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { + temp[i][j]= acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; +} +} +syclexp::work_group_memory temp2; +temp2 = temp; +for (int i = 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { + acc_b[i][j] = temp2[i][j]; +} +} + });}); + } +for (int i = 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { +// Two swaps are the same as no swap + assert(a[i][j] == old_a[i][j] && b[i][j] == old_b[i][j] && "Incorrect swap!"); +} +} + +// Same as above but construct the second temporary inside the kernel and copy-construct it from the first temporary. +{ +sycl::buffer buf_a{ a[0], sycl::range{N, M}}; + sycl::buffer buf_b{ b[0], sycl::range{N, M}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{ buf_a, cgh }; + sycl::accessor acc_b { buf_b, cgh }; + syclexp::work_group_memory temp{ cgh }; + syclexp::work_group_memory temp2{ cgh }; + cgh.single_task([=]() { +for (int i= 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { + temp[i][j]= acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; +} +} +syclexp::work_group_memory temp2{ temp }; +for (int i = 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { + acc_b[i][j] = temp2[i][j]; +} +} + });}); + } +for (int i = 0; i < N; ++i) { +for (int j = 0; j < M; ++j) { +// Three swaps are the same as one swap + assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && "Incorrect swap!"); +} +} + +} +constexpr size_t N = 100; +constexpr size_t M = 100; int main() { - int a = 25; - int b = 42; - int arr1[5] = {0, 1, 2, 3, 4}; - int arr2[5] = {5, 6, 7, 8, 9}; - swap_scalar(a, b); - swap_bounded_array_1d(arr1, arr2); + int intarr1[N][M]; + int intarr2[N][M]; + float floatarr1[N][M]; + float floatarr2[N][M]; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + intarr1[i][j] = i + j; + intarr2[i][j] = i * j; + floatarr1[i][j] = (i + 1) / (j + 1); + floatarr2[i][j] = (j + 1) / (i + 1); +} +} +for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + swap_scalar(intarr1[i][j], intarr2[i][j]); + swap_scalar(floatarr1[i][j], floatarr2[i][j]); +} +swap_array_1d(intarr1[i], intarr2[i]); +swap_array_1d(floatarr1[i], floatarr2[i]); +} +swap_array_2d(intarr1, intarr2); +swap_array_2d(floatarr1, floatarr2); return 0; } - - - - - - - - - diff --git a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp index b7905c573dae4..f1d37d00272b4 100644 --- a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp @@ -1,9 +1,6 @@ -// RUN: %{build} -o %{t.out} -// RUN: %{run} %{t.out} +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out -#include -#include -#include #include #include #include From 8cff6033eac64a95b2e15071508d95dd5be2fcd9 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 28 Aug 2024 22:43:03 -0700 Subject: [PATCH 033/107] Fix formatting --- .../oneapi/experimental/work_group_memory.hpp | 26 +- .../swap_test_using_work_group_memory.cpp | 566 +++++++++--------- .../work_group_memory_sanity_test.cpp | 230 +++---- 3 files changed, 423 insertions(+), 399 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 59c5c6f92ab85..5e10bece6a317 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -22,7 +22,7 @@ inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; class work_group_memory_impl { public: - work_group_memory_impl() : wgm_size{ 0 }, buffer_size{ 0 } {} + work_group_memory_impl() : wgm_size{0}, buffer_size{0} {} work_group_memory_impl(const work_group_memory_impl &rhs) = default; work_group_memory_impl & operator=(const work_group_memory_impl &rhs) = default; @@ -36,22 +36,17 @@ inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { return wgm->wgm_size; } -// The following 3 functions help us get the address of the first element of a multi-dimensional -// array, be it bounded or unbounded. A scalar is also included. In that case, it just returns -// the address of the scalar. -template -auto getData(DataT& scalar) { - return &scalar; -} +// The following 3 functions help us get the address of the first element of a +// multi-dimensional array, be it bounded or unbounded. A scalar is also +// included. In that case, it just returns the address of the scalar. +template auto getData(DataT &scalar) { return &scalar; } -template -auto getData(DataT (&bounded_arr)[N]) { - return getData(bounded_arr[0]); +template auto getData(DataT (&bounded_arr)[N]) { + return getData(bounded_arr[0]); } -template -auto getData(DataT (&unbounded_arr)[]) { - return getData(unbounded_arr[0]); +template auto getData(DataT (&unbounded_arr)[]) { + return getData(unbounded_arr[0]); } } // namespace detail @@ -86,7 +81,8 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory multi_ptr get_multi_ptr() const { return sycl::address_space_cast(sycl::detail::getData(*ptr)); + IsDecorated, value_type>( + sycl::detail::getData(*ptr)); } DataT *operator&() const { return ptr; } operator DataT &() const { return *(this->operator&()); } diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp index dfb7bdae2fc50..00b284b7017ee 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp @@ -1,301 +1,325 @@ // RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: %{run} %t.out -#include -#include #include #include +#include +#include namespace syclexp = sycl::ext::oneapi::experimental; -// This test performs a swap of two scalars/arrays inside a kernel using a work_group_memory object as a temporary buffer. -// The test is done for scalar types and bounded arrays. After the kernel finishes, it is verified on the host side -// that the swap worked. - -// One important note is that for unbounded arrays, the feature is unstable. Specifically, the code may or may not compile -// when kernels reference work group memory objects that have been constructed with the unbounded array type. This is due to a -// limitation of SPIRV where it does not allow arrays of length zero. For example, an unbounded array may be translated to an array of length zero in -// LLVM IR and during the LLVM IR -> SPIRV translation phase, the translator rejects all arrays of length zero because they are invalid constructs in SPIRV. -// As a result of this, unbounded arrays do not appear in this test. They do appear in the sanity test though in this directory because there the unbounded arrays are used with concrete subscript indices which seems to work, for now at least. +// This test performs a swap of two scalars/arrays inside a kernel using a +// work_group_memory object as a temporary buffer. The test is done for scalar +// types and bounded arrays. After the kernel finishes, it is verified on the +// host side that the swap worked. -template< typename T> -void swap_scalar(T& a, T& b) { - sycl::queue q; - const T old_a = a; - const T old_b = b; - { - sycl::buffer buf_a{ &a, 1}; - sycl::buffer buf_b{ &b, 1}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - cgh.single_task([=]() { - temp = acc_a[0]; - acc_a[0] = acc_b[0]; - acc_b[0] = temp; - });}); - } - assert(a == old_b && b == old_a && "Incorrect swap!"); - // swap again but this time using two temporaries. The first temporary will be used to save the value of a - // and the second temporay will be default-constructed and then copy-assigned from the first temporary - // to be then used to write that value to b. - { - sycl::buffer buf_a{ &a, 1}; - sycl::buffer buf_b{ &b, 1}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - syclexp::work_group_memory temp2; - cgh.single_task([=]() { - temp2 = temp; // temp and temp2 have the same underlying data - temp = acc_a[0]; - acc_a[0] = acc_b[0]; - acc_b[0] = temp2; // safe to use temp2 - });}); - } - // Two swaps same as no swaps - assert(a == old_a && b == old_b && "Incorrect swap!"); - // Initialize a second temporary and instead of assigning the first temporary to it, assign only the value of the data - // of the first temporary so that unlike above, the two temporaries will not be aliasing the same memory location but they - // will have equal values. - { - sycl::buffer buf_a{ &a, 1}; - sycl::buffer buf_b{ &b, 1}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - syclexp::work_group_memory temp2{ cgh }; - cgh.single_task([=]() { - temp = acc_a[0]; - acc_a[0] = acc_b[0]; - temp2 = *(temp.get_multi_ptr()); // temp2 now has the same value as temp but not the same memory location - acc_b[0] = temp2; - });}); - } - // Three swaps same as one swap - assert(a == old_b && b == old_a && "Incorrect swap!"); - // Same as above but instead of using multi_ptr, use address-of operator. - { - sycl::buffer buf_a{ &a, 1}; - sycl::buffer buf_b{ &b, 1}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; -syclexp::work_group_memory temp2{ cgh }; - cgh.single_task([=]() { - temp = acc_a[0]; - acc_a[0] = acc_b[0]; - temp2 = *(&temp); - acc_b[0] = temp2; - });}); - } - // Four swaps same as no swap - assert(a == old_a && b == old_b && "Incorrect swap!"); -} +// One important note is that for unbounded arrays, the feature is unstable. +// Specifically, the code may or may not compile when kernels reference work +// group memory objects that have been constructed with the unbounded array +// type. This is due to a limitation of SPIRV where it does not allow arrays of +// length zero. For example, an unbounded array may be translated to an array of +// length zero in LLVM IR and during the LLVM IR -> SPIRV translation phase, the +// translator rejects all arrays of length zero because they are invalid +// constructs in SPIRV. As a result of this, unbounded arrays do not appear in +// this test. They do appear in the sanity test though in this directory because +// there the unbounded arrays are used with concrete subscript indices which +// seems to work, for now at least. -template -void swap_array_1d(T (&a)[N], T (&b)[N]) { -sycl::queue q; - T old_a[N]; - std::memcpy(old_a, a, sizeof(a)); - T old_b[N]; - std::memcpy(old_b, b, sizeof(b)); - { - sycl::buffer buf_a{ a, N}; - sycl::buffer buf_b{ b, N}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - cgh.single_task([=]() { -for (int i= 0; i < N; ++i) { - temp[i] = acc_a[i]; - acc_a[i] = acc_b[i]; - acc_b[i] = temp[i]; -} - });}); - } -for (int i = 0; i < N; ++i) { - assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); -} +template void swap_scalar(T &a, T &b) { + sycl::queue q; + const T old_a = a; + const T old_b = b; + { + sycl::buffer buf_a{&a, 1}; + sycl::buffer buf_b{&b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + cgh.single_task([=]() { + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + acc_b[0] = temp; + }); + }); + } + assert(a == old_b && b == old_a && "Incorrect swap!"); -// Instead of working with the temporary work group memory object, we retrieve its corresponding -// multi-pointer and work with it instead. -{ - sycl::buffer buf_a{ a, N}; - sycl::buffer buf_b{ b, N}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - cgh.single_task([=]() { - auto ptr = temp.get_multi_ptr(); -for (int i= 0; i < N; ++i) { - ptr[i] = acc_a[i]; - acc_a[i] = acc_b[i]; - acc_b[i] = ptr[i]; + // swap again but this time using two temporaries. The first temporary will be + // used to save the value of a and the second temporay will be + // default-constructed and then copy-assigned from the first temporary to be + // then used to write that value to b. + { + sycl::buffer buf_a{&a, 1}; + sycl::buffer buf_b{&b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + syclexp::work_group_memory temp2; + cgh.single_task([=]() { + temp2 = temp; // temp and temp2 have the same underlying data + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + acc_b[0] = temp2; // safe to use temp2 + }); + }); + } + // Two swaps same as no swaps + assert(a == old_a && b == old_b && "Incorrect swap!"); + + // Initialize a second temporary and instead of assigning the first temporary + // to it, assign only the value of the data of the first temporary so that + // unlike above, the two temporaries will not be aliasing the same memory + // location but they will have equal values. + { + sycl::buffer buf_a{&a, 1}; + sycl::buffer buf_b{&b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + syclexp::work_group_memory temp2{cgh}; + cgh.single_task([=]() { + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + temp2 = *(temp.get_multi_ptr()); // temp2 now has the same value as temp + // but not the same memory location + acc_b[0] = temp2; + }); + }); + } + // Three swaps same as one swap + assert(a == old_b && b == old_a && "Incorrect swap!"); + + // Same as above but instead of using multi_ptr, use address-of operator. + { + sycl::buffer buf_a{&a, 1}; + sycl::buffer buf_b{&b, 1}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + syclexp::work_group_memory temp2{cgh}; + cgh.single_task([=]() { + temp = acc_a[0]; + acc_a[0] = acc_b[0]; + temp2 = *(&temp); + acc_b[0] = temp2; + }); + }); + } + // Four swaps same as no swap + assert(a == old_a && b == old_b && "Incorrect swap!"); } - });}); + +template void swap_array_1d(T (&a)[N], T (&b)[N]) { + sycl::queue q; + T old_a[N]; + std::memcpy(old_a, a, sizeof(a)); + T old_b[N]; + std::memcpy(old_b, b, sizeof(b)); + { + sycl::buffer buf_a{a, N}; + sycl::buffer buf_b{b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + cgh.single_task([=]() { + for (int i = 0; i < N; ++i) { + temp[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = temp[i]; } -// Two swaps same as ono swap -for (int i = 0; i < N; ++i) { -assert(a[i] == old_a[i] && b[i] == old_b[i] && "Incorrect swap!"); -} + }); + }); + } + for (int i = 0; i < N; ++i) { + assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); + } -// Same as above but use a pointer returned by the address-of operator instead. -{ - sycl::buffer buf_a{ a, N}; - sycl::buffer buf_b{ b, N}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - cgh.single_task([=]() { -auto ptr = &temp; -for (int i= 0; i < N; ++i) { - (*ptr)[i] = acc_a[i]; - acc_a[i] = acc_b[i]; - acc_b[i] = (*ptr)[i]; -} - });}); + // Instead of working with the temporary work group memory object, we retrieve + // its corresponding multi-pointer and work with it instead. + { + sycl::buffer buf_a{a, N}; + sycl::buffer buf_b{b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + cgh.single_task([=]() { + auto ptr = temp.get_multi_ptr(); + for (int i = 0; i < N; ++i) { + ptr[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = ptr[i]; } -// Three swaps same as one swap -for (int i = 0; i < N; ++i) { - assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); + }); + }); + } + // Two swaps same as no swap + for (int i = 0; i < N; ++i) { + assert(a[i] == old_a[i] && b[i] == old_b[i] && "Incorrect swap!"); + } + // Same as above but use a pointer returned by the address-of operator + // instead. + { + sycl::buffer buf_a{a, N}; + sycl::buffer buf_b{b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + cgh.single_task([=]() { + auto ptr = &temp; + for (int i = 0; i < N; ++i) { + (*ptr)[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = (*ptr)[i]; + } + }); + }); + } + // Three swaps same as one swap + for (int i = 0; i < N; ++i) { + assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); + } } -} - -template +template void swap_array_2d(T (&a)[N][M], T (&b)[N][M]) { -sycl::queue q; - T old_a[N][M]; -for (int i = 0; i < N; ++i) { - std::memcpy(old_a[i], a[i], sizeof(a[0])); -} - T old_b[N][M]; -for (int i = 0; i < N; ++i) { + sycl::queue q; + T old_a[N][M]; + for (int i = 0; i < N; ++i) { + std::memcpy(old_a[i], a[i], sizeof(a[0])); + } + T old_b[N][M]; + for (int i = 0; i < N; ++i) { - std::memcpy(old_b[i], b[i], sizeof(b[0])); -} - { - sycl::buffer buf_a{ a[0], sycl::range{N, M}}; - sycl::buffer buf_b{ b[0], sycl::range{N, M}}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - cgh.single_task([=]() { -for (int i= 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { - temp[i][j]= acc_a[i][j]; - acc_a[i][j] = acc_b[i][j]; - acc_b[i][j] = temp[i][j]; -} -} - });}); + std::memcpy(old_b[i], b[i], sizeof(b[0])); + } + { + sycl::buffer buf_a{a[0], sycl::range{N, M}}; + sycl::buffer buf_b{b[0], sycl::range{N, M}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + cgh.single_task([=]() { + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + acc_b[i][j] = temp[i][j]; + } } -for (int i = 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { - assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && "Incorrect swap!"); -} -} + }); + }); + } + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && + "Incorrect swap!"); + } + } -// Perform the swap but this time use two temporary work group memory objects. -// One will save the value of acc_a and the other will be copy-assigned from it -// and will be used to write the values back to acc_b. -{ -sycl::buffer buf_a{ a[0], sycl::range{N, M}}; - sycl::buffer buf_b{ b[0], sycl::range{N, M}}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - syclexp::work_group_memory temp2{ cgh }; - cgh.single_task([=]() { -for (int i= 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { - temp[i][j]= acc_a[i][j]; - acc_a[i][j] = acc_b[i][j]; -} -} -syclexp::work_group_memory temp2; -temp2 = temp; -for (int i = 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { - acc_b[i][j] = temp2[i][j]; -} -} - });}); + // Perform the swap but this time use two temporary work group memory objects. + // One will save the value of acc_a and the other will be copy-assigned from + // it and will be used to write the values back to acc_b. + { + sycl::buffer buf_a{a[0], sycl::range{N, M}}; + sycl::buffer buf_b{b[0], sycl::range{N, M}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + syclexp::work_group_memory temp2{cgh}; + cgh.single_task([=]() { + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + } } -for (int i = 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { -// Two swaps are the same as no swap - assert(a[i][j] == old_a[i][j] && b[i][j] == old_b[i][j] && "Incorrect swap!"); -} -} + syclexp::work_group_memory temp2; + temp2 = temp; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + acc_b[i][j] = temp2[i][j]; + } + } + }); + }); + } + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + // Two swaps are the same as no swap + assert(a[i][j] == old_a[i][j] && b[i][j] == old_b[i][j] && + "Incorrect swap!"); + } + } -// Same as above but construct the second temporary inside the kernel and copy-construct it from the first temporary. -{ -sycl::buffer buf_a{ a[0], sycl::range{N, M}}; - sycl::buffer buf_b{ b[0], sycl::range{N, M}}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc_a{ buf_a, cgh }; - sycl::accessor acc_b { buf_b, cgh }; - syclexp::work_group_memory temp{ cgh }; - syclexp::work_group_memory temp2{ cgh }; - cgh.single_task([=]() { -for (int i= 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { - temp[i][j]= acc_a[i][j]; - acc_a[i][j] = acc_b[i][j]; -} -} -syclexp::work_group_memory temp2{ temp }; -for (int i = 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { - acc_b[i][j] = temp2[i][j]; -} -} - });}); + // Same as above but construct the second temporary inside the kernel and + // copy-construct it from the first temporary. + { + sycl::buffer buf_a{a[0], sycl::range{N, M}}; + sycl::buffer buf_b{b[0], sycl::range{N, M}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{cgh}; + syclexp::work_group_memory temp2{cgh}; + cgh.single_task([=]() { + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + } } -for (int i = 0; i < N; ++i) { -for (int j = 0; j < M; ++j) { -// Three swaps are the same as one swap - assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && "Incorrect swap!"); -} + syclexp::work_group_memory temp2{temp}; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + acc_b[i][j] = temp2[i][j]; + } + } + }); + }); + } + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + // Three swaps are the same as one swap + assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && + "Incorrect swap!"); + } + } } -} constexpr size_t N = 100; constexpr size_t M = 100; int main() { - int intarr1[N][M]; - int intarr2[N][M]; - float floatarr1[N][M]; - float floatarr2[N][M]; - for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - intarr1[i][j] = i + j; - intarr2[i][j] = i * j; - floatarr1[i][j] = (i + 1) / (j + 1); - floatarr2[i][j] = (j + 1) / (i + 1); -} -} -for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - swap_scalar(intarr1[i][j], intarr2[i][j]); - swap_scalar(floatarr1[i][j], floatarr2[i][j]); -} -swap_array_1d(intarr1[i], intarr2[i]); -swap_array_1d(floatarr1[i], floatarr2[i]); -} -swap_array_2d(intarr1, intarr2); -swap_array_2d(floatarr1, floatarr2); -return 0; + int intarr1[N][M]; + int intarr2[N][M]; + float floatarr1[N][M]; + float floatarr2[N][M]; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + intarr1[i][j] = i + j; + intarr2[i][j] = i * j; + floatarr1[i][j] = (i + 1) / (j + 1); + floatarr2[i][j] = (j + 1) / (i + 1); + } + } + for (int i = 0; i < N; ++i) { + for (int j = 0; j < M; ++j) { + swap_scalar(intarr1[i][j], intarr2[i][j]); + swap_scalar(floatarr1[i][j], floatarr2[i][j]); + } + swap_array_1d(intarr1[i], intarr2[i]); + swap_array_1d(floatarr1[i], floatarr2[i]); + } + swap_array_2d(intarr1, intarr2); + swap_array_2d(floatarr1, floatarr2); + return 0; } diff --git a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp index f1d37d00272b4..7e53e61c54686 100644 --- a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp @@ -5,7 +5,8 @@ #include #include -// Sanity test that checks to see if idiomatic code involving work_group_memory objects compiles and runs with no errors. +// Sanity test that checks to see if idiomatic code involving work_group_memory +// objects compiles and runs with no errors. namespace syclex = sycl::ext::oneapi::experimental; sycl::queue global_q; @@ -18,9 +19,7 @@ struct point { int y; }; -void simple_inc(const syclex::work_group_memory& mem) { - mem++; -} +void simple_inc(const syclex::work_group_memory &mem) { mem++; } void fancy_inc(syclex::work_group_memory mem) { syclex::work_group_memory t = mem; @@ -35,129 +34,134 @@ void test_breadth() { int *res = sycl::malloc_host(16, q); q.submit([&](sycl::handler &cgh) { - syclex::work_group_memory mem1{cgh}; - syclex::work_group_memory mem2{cgh}; - syclex::work_group_memory mem3{cgh}; - syclex::work_group_memory mem4{5, cgh}; - syclex::work_group_memory mem5{2, cgh}; - syclex::work_group_memory mem6{2, cgh}; - syclex::work_group_memory mem7{cgh}; - syclex::work_group_memory mem8{2, cgh}; - - cgh.single_task([=] { - // Operations on scalar - ++mem1; - mem1++; - mem1 += 1; - mem1 = mem1 + 1; - int *p1 = &mem1; (*p1)++; - simple_inc(mem1); - fancy_inc(mem1); - res[0] = *(mem1.get_multi_ptr()); - res[1] = mem1; - - // Operations on bounded array - mem2[4] = mem2[4] + 1; - int (*p2)[10] = &mem2; (*p2)[4]++; - res[2] = mem2.get_multi_ptr()[4]; - res[3] = mem2[4]; - - mem3[4] = mem3[4] + 1; - int (*p3)[10] = &mem3; (*p3)[4]++; - res[4] = mem3.get_multi_ptr()[4]; - res[5] = mem3[4]; - - // Operations on unbounded array - mem4[4] = mem4[4] + 1; - int (*p4)[] = &mem4; (*p4)[4]++; - res[6] = mem4.get_multi_ptr()[4]; - res[7] = mem4[4]; - - // Operations on unbounded multi-dimensional array - mem5[1][5] = mem5[1][5] + 1; - mem5[1][7] = mem5[1][7] + 1; - res[8] = mem5.get_multi_ptr()[10 + 5]; - res[9] = mem5[1][7]; - - mem6[1][5] = mem6[1][5] + 1; - mem6[1][7] = mem6[1][7] + 1; - res[10] = mem6.get_multi_ptr()[10 + 5]; - res[11] = mem6[1][7]; - - // Operations on scalar struct - (&mem7)->x++; - (&mem7)->y += 1; - point pnt = mem7; - pnt.x++; - pnt.y++; - mem7 = pnt; - res[12] = (&mem7)->x; - res[13] = (&mem7)->y; - - // Operations on unbounded multi-dimensional array of struct - mem8[1][5].x++; - mem8[1][5].y += 1; - res[14] = mem8.get_multi_ptr()[10 + 5].x; - res[15] = mem8[1][5].y; - }); - }).wait(); + syclex::work_group_memory mem1{cgh}; + syclex::work_group_memory mem2{cgh}; + syclex::work_group_memory mem3{cgh}; + syclex::work_group_memory mem4{5, cgh}; + syclex::work_group_memory mem5{2, cgh}; + syclex::work_group_memory mem6{2, cgh}; + syclex::work_group_memory mem7{cgh}; + syclex::work_group_memory mem8{2, cgh}; + + cgh.single_task([=] { + // Operations on scalar + ++mem1; + mem1++; + mem1 += 1; + mem1 = mem1 + 1; + int *p1 = &mem1; + (*p1)++; + simple_inc(mem1); + fancy_inc(mem1); + res[0] = *(mem1.get_multi_ptr()); + res[1] = mem1; + + // Operations on bounded array + mem2[4] = mem2[4] + 1; + int(*p2)[10] = &mem2; + (*p2)[4]++; + res[2] = mem2.get_multi_ptr()[4]; + res[3] = mem2[4]; + + mem3[4] = mem3[4] + 1; + int(*p3)[10] = &mem3; + (*p3)[4]++; + res[4] = mem3.get_multi_ptr()[4]; + res[5] = mem3[4]; + + // Operations on unbounded array + mem4[4] = mem4[4] + 1; + int(*p4)[] = &mem4; + (*p4)[4]++; + res[6] = mem4.get_multi_ptr()[4]; + res[7] = mem4[4]; + + // Operations on unbounded multi-dimensional array + mem5[1][5] = mem5[1][5] + 1; + mem5[1][7] = mem5[1][7] + 1; + res[8] = mem5.get_multi_ptr()[10 + 5]; + res[9] = mem5[1][7]; + + mem6[1][5] = mem6[1][5] + 1; + mem6[1][7] = mem6[1][7] + 1; + res[10] = mem6.get_multi_ptr()[10 + 5]; + res[11] = mem6[1][7]; + + // Operations on scalar struct + (&mem7)->x++; + (&mem7)->y += 1; + point pnt = mem7; + pnt.x++; + pnt.y++; + mem7 = pnt; + res[12] = (&mem7)->x; + res[13] = (&mem7)->y; + + // Operations on unbounded multi-dimensional array of struct + mem8[1][5].x++; + mem8[1][5].y += 1; + res[14] = mem8.get_multi_ptr()[10 + 5].x; + res[15] = mem8[1][5].y; + }); + }).wait(); } void test_basic() { sycl::queue q; q.submit([&](sycl::handler &cgh) { - // Allocate one element for each work-item in the work-group. - syclex::work_group_memory mem{cgh}; + // Allocate one element for each work-item in the work-group. + syclex::work_group_memory mem{cgh}; - sycl::nd_range ndr{{SIZE}, {WGSIZE}}; - cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { - size_t id = it.get_local_linear_id(); + sycl::nd_range ndr{{SIZE}, {WGSIZE}}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + size_t id = it.get_local_linear_id(); - // Each work-item has its own dedicated element of the array. - mem[id] = 0; - }); - }).wait(); + // Each work-item has its own dedicated element of the array. + mem[id] = 0; + }); + }).wait(); } void test_operations() { sycl::queue q; q.submit([&](sycl::handler &cgh) { - syclex::work_group_memory mem1{cgh}; // scalar - syclex::work_group_memory mem2{cgh}; // bounded array - syclex::work_group_memory mem3{5, cgh}; // unbounded array - syclex::work_group_memory mem4{2, cgh}; // multi-dimensional array - syclex::work_group_memory mem5{cgh}; // array of struct - - sycl::nd_range ndr{{SIZE}, {WGSIZE}}; - cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { - if (it.get_group().leader()) { - // A "work_group_memory" templated on a scalar type acts much like the - // enclosed scalar type. - ++mem1; - mem1++; - mem1 += 1; - mem1 = mem1 + 1; - int *p1 = &mem1; - - // A "work_group_memory" templated on an array type (either bounded or - // unbounded) acts like an array. - ++mem2[4]; - mem2[4]++; - mem2[4] = mem2[4] + 1; - int *p2 = &mem2[4]; - - // A multi-dimensional array works as expected. - mem4[1][5] = mem4[1][5] + 1; - mem4[1][7] = mem4[1][7] + 1; - - // An array of structs works as expected too. - mem5[1].x++; - mem5[1].y = mem5[1].y + 1; - } - }); - }).wait(); + syclex::work_group_memory mem1{cgh}; // scalar + syclex::work_group_memory mem2{cgh}; // bounded array + syclex::work_group_memory mem3{5, cgh}; // unbounded array + syclex::work_group_memory mem4{2, + cgh}; // multi-dimensional array + syclex::work_group_memory mem5{cgh}; // array of struct + + sycl::nd_range ndr{{SIZE}, {WGSIZE}}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + if (it.get_group().leader()) { + // A "work_group_memory" templated on a scalar type acts much like the + // enclosed scalar type. + ++mem1; + mem1++; + mem1 += 1; + mem1 = mem1 + 1; + int *p1 = &mem1; + + // A "work_group_memory" templated on an array type (either bounded or + // unbounded) acts like an array. + ++mem2[4]; + mem2[4]++; + mem2[4] = mem2[4] + 1; + int *p2 = &mem2[4]; + + // A multi-dimensional array works as expected. + mem4[1][5] = mem4[1][5] + 1; + mem4[1][7] = mem4[1][7] + 1; + + // An array of structs works as expected too. + mem5[1].x++; + mem5[1].y = mem5[1].y + 1; + } + }); + }).wait(); } int main() { From 3228aebf83a8de26602df880764a6d59b132b8c3 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 18:32:09 +0200 Subject: [PATCH 034/107] Revamp tests for work group memory extension --- .../oneapi/experimental/work_group_memory.hpp | 27 +-- sycl/include/sycl/handler.hpp | 15 +- .../WorkGroupMemory/UR_argument_test.cpp | 20 ++ .../swap_test_using_work_group_memory.cpp | 220 +++++++++--------- .../WorkGroupMemory/unbounded_array_test.cpp | 23 ++ .../work_group_memory_invalid_usage_test.cpp | 38 +++ 6 files changed, 213 insertions(+), 130 deletions(-) create mode 100644 sycl/test-e2e/WorkGroupMemory/UR_argument_test.cpp create mode 100644 sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp create mode 100644 sycl/test-e2e/WorkGroupMemory/work_group_memory_invalid_usage_test.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 5e10bece6a317..8644806ea9572 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -35,20 +35,6 @@ class work_group_memory_impl { inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { return wgm->wgm_size; } - -// The following 3 functions help us get the address of the first element of a -// multi-dimensional array, be it bounded or unbounded. A scalar is also -// included. In that case, it just returns the address of the scalar. -template auto getData(DataT &scalar) { return &scalar; } - -template auto getData(DataT (&bounded_arr)[N]) { - return getData(bounded_arr[0]); -} - -template auto getData(DataT (&unbounded_arr)[]) { - return getData(unbounded_arr[0]); -} - } // namespace detail namespace ext::oneapi::experimental { @@ -60,7 +46,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory private: using decoratedPtr = typename sycl::detail::DecoratedType< - DataT, access::address_space::local_space>::type *; + value_type, access::address_space::local_space>::type *; public: work_group_memory() = default; @@ -68,12 +54,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory work_group_memory &operator=(const work_group_memory &rhs) = default; template >> - work_group_memory(handler &) + work_group_memory(handler &cgh) : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), sizeof(DataT)) {} template >> - work_group_memory(size_t num, handler &) + work_group_memory(size_t num, handler &cgh) : sycl::detail::work_group_memory_impl( sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} @@ -81,11 +67,10 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory multi_ptr get_multi_ptr() const { return sycl::address_space_cast( - sycl::detail::getData(*ptr)); + IsDecorated, value_type>(ptr); } - DataT *operator&() const { return ptr; } - operator DataT &() const { return *(this->operator&()); } + DataT *operator&() const { return reinterpret_cast(ptr); } + operator DataT &() const { return *reinterpret_cast(ptr); } template >> const work_group_memory &operator=(const DataT &value) const { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 46a134231686a..987e4ac6cea3a 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -539,8 +539,10 @@ class __SYCL_EXPORT handler { /// According to section 4.7.6.11. of the SYCL specification, a local accessor /// must not be used in a SYCL kernel function that is invoked via single_task /// or via the simple form of parallel_for that takes a range parameter. + /// The same restriction is in place for work group memory objects. Refer to + /// its spec for more details. template - void throwOnLocalAccessorMisuse() const { + void throwOnKernelParameterMisuse() const { using NameT = typename detail::get_kernel_name_t::name; for (unsigned I = 0; I < detail::getKernelNumParams(); ++I) { @@ -556,6 +558,13 @@ class __SYCL_EXPORT handler { "A local accessor must not be used in a SYCL kernel function " "that is invoked via single_task or via the simple form of " "parallel_for that takes a range parameter."); + if (Kind == detail::kernel_param_kind_t::kind_work_group_memory) + throw sycl::exception( + make_error_code(errc::kernel_argument), + "A work group memory object must not be used in a SYCL kernel " + "function that is invoked via single_task or via the simple form " + "of " + "parallel_for that takes a range parameter."); } } @@ -1331,7 +1340,7 @@ class __SYCL_EXPORT handler { void parallel_for_lambda_impl(range UserRange, PropertiesT Props, KernelType KernelFunc) { throwIfActionIsCreated(); - throwOnLocalAccessorMisuse(); + throwOnKernelParameterMisuse(); if (!range_size_fits_in_size_t(UserRange)) throw sycl::exception(make_error_code(errc::runtime), "The total number of work-items in " @@ -1856,7 +1865,7 @@ class __SYCL_EXPORT handler { _KERNELFUNCPARAM(KernelFunc)) { (void)Props; throwIfActionIsCreated(); - throwOnLocalAccessorMisuse(); + throwOnKernelParameterMisuse(); // TODO: Properties may change the kernel function, so in order to avoid // conflicts they should be included in the name. using NameT = diff --git a/sycl/test-e2e/WorkGroupMemory/UR_argument_test.cpp b/sycl/test-e2e/WorkGroupMemory/UR_argument_test.cpp new file mode 100644 index 0000000000000..82fa25cf62549 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/UR_argument_test.cpp @@ -0,0 +1,20 @@ +// RUN: %{build} -o %t.out +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s + +#include +#include + +// Check that the work group memory object is mapped to exactly one backend +// kernel argument. + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &cgh) { + sycl::ext::oneapi::experimental::work_group_memory data{cgh}; + cgh.parallel_for(sycl::nd_range<1>{1, 1}, + [=](sycl::nd_item<1> it) { data[0] = 42; }); + }); +} + +// CHECK-COUNT-1: ---> urKernelSetArg +// CHECK-NOT: ---> urKernelSetArg diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp index 00b284b7017ee..080e551e622e2 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp @@ -1,34 +1,25 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out - #include #include #include #include - +#include +#include namespace syclexp = sycl::ext::oneapi::experimental; +sycl::queue q; + // This test performs a swap of two scalars/arrays inside a kernel using a // work_group_memory object as a temporary buffer. The test is done for scalar // types and bounded arrays. After the kernel finishes, it is verified on the // host side that the swap worked. -// One important note is that for unbounded arrays, the feature is unstable. -// Specifically, the code may or may not compile when kernels reference work -// group memory objects that have been constructed with the unbounded array -// type. This is due to a limitation of SPIRV where it does not allow arrays of -// length zero. For example, an unbounded array may be translated to an array of -// length zero in LLVM IR and during the LLVM IR -> SPIRV translation phase, the -// translator rejects all arrays of length zero because they are invalid -// constructs in SPIRV. As a result of this, unbounded arrays do not appear in -// this test. They do appear in the sanity test though in this directory because -// there the unbounded arrays are used with concrete subscript indices which -// seems to work, for now at least. - template void swap_scalar(T &a, T &b) { - sycl::queue q; const T old_a = a; const T old_b = b; + const size_t size = 1; + const size_t wgsize = 1; { sycl::buffer buf_a{&a, 1}; sycl::buffer buf_b{&b, 1}; @@ -36,7 +27,8 @@ template void swap_scalar(T &a, T &b) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; - cgh.single_task([=]() { + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { temp = acc_a[0]; acc_a[0] = acc_b[0]; acc_b[0] = temp; @@ -56,8 +48,9 @@ template void swap_scalar(T &a, T &b) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; - syclexp::work_group_memory temp2; - cgh.single_task([=]() { + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { + syclexp::work_group_memory temp2; temp2 = temp; // temp and temp2 have the same underlying data temp = acc_a[0]; acc_a[0] = acc_b[0]; @@ -80,7 +73,8 @@ template void swap_scalar(T &a, T &b) { sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; syclexp::work_group_memory temp2{cgh}; - cgh.single_task([=]() { + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { temp = acc_a[0]; acc_a[0] = acc_b[0]; temp2 = *(temp.get_multi_ptr()); // temp2 now has the same value as temp @@ -101,7 +95,8 @@ template void swap_scalar(T &a, T &b) { sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; syclexp::work_group_memory temp2{cgh}; - cgh.single_task([=]() { + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { temp = acc_a[0]; acc_a[0] = acc_b[0]; temp2 = *(&temp); @@ -113,12 +108,17 @@ template void swap_scalar(T &a, T &b) { assert(a == old_a && b == old_b && "Incorrect swap!"); } -template void swap_array_1d(T (&a)[N], T (&b)[N]) { +// Swap two 1d arrays in batches of size batch_size where each batch will be +// swapped by items in the same work group. +template +void swap_array_1d(T (&a)[N], T (&b)[N], size_t batch_size) { sycl::queue q; T old_a[N]; std::memcpy(old_a, a, sizeof(a)); T old_b[N]; std::memcpy(old_b, b, sizeof(b)); + const size_t size = N; + const size_t wgsize = batch_size; { sycl::buffer buf_a{a, N}; sycl::buffer buf_b{b, N}; @@ -126,12 +126,12 @@ template void swap_array_1d(T (&a)[N], T (&b)[N]) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; - cgh.single_task([=]() { - for (int i = 0; i < N; ++i) { - temp[i] = acc_a[i]; - acc_a[i] = acc_b[i]; - acc_b[i] = temp[i]; - } + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + const auto i = it.get_global_id(); + temp[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = temp[i]; }); }); } @@ -148,13 +148,13 @@ template void swap_array_1d(T (&a)[N], T (&b)[N]) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; - cgh.single_task([=]() { + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { auto ptr = temp.get_multi_ptr(); - for (int i = 0; i < N; ++i) { - ptr[i] = acc_a[i]; - acc_a[i] = acc_b[i]; - acc_b[i] = ptr[i]; - } + const auto i = it.get_global_id(); + ptr[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = ptr[i]; }); }); } @@ -172,13 +172,13 @@ template void swap_array_1d(T (&a)[N], T (&b)[N]) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; syclexp::work_group_memory temp{cgh}; - cgh.single_task([=]() { + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + const auto i = it.get_global_id(); auto ptr = &temp; - for (int i = 0; i < N; ++i) { - (*ptr)[i] = acc_a[i]; - acc_a[i] = acc_b[i]; - acc_b[i] = (*ptr)[i]; - } + (*ptr)[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = (*ptr)[i]; }); }); } @@ -188,38 +188,39 @@ template void swap_array_1d(T (&a)[N], T (&b)[N]) { } } -template -void swap_array_2d(T (&a)[N][M], T (&b)[N][M]) { +template +void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { sycl::queue q; - T old_a[N][M]; + T old_a[N][N]; for (int i = 0; i < N; ++i) { std::memcpy(old_a[i], a[i], sizeof(a[0])); } - T old_b[N][M]; + T old_b[N][N]; for (int i = 0; i < N; ++i) { std::memcpy(old_b[i], b[i], sizeof(b[0])); } + const auto size = sycl::range{N, N}; + const auto wgsize = sycl::range{batch_size, batch_size}; { - sycl::buffer buf_a{a[0], sycl::range{N, M}}; - sycl::buffer buf_b{b[0], sycl::range{N, M}}; + sycl::buffer buf_a{a[0], sycl::range{N, N}}; + sycl::buffer buf_b{b[0], sycl::range{N, N}}; q.submit([&](sycl::handler &cgh) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; - syclexp::work_group_memory temp{cgh}; - cgh.single_task([=]() { - for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - temp[i][j] = acc_a[i][j]; - acc_a[i][j] = acc_b[i][j]; - acc_b[i][j] = temp[i][j]; - } - } + syclexp::work_group_memory temp{cgh}; + sycl::nd_range<2> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<2> it) { + const auto i = it.get_global_id()[0]; + const auto j = it.get_global_id()[1]; + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + acc_b[i][j] = temp[i][j]; }); }); } for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { + for (int j = 0; j < N; ++j) { assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && "Incorrect swap!"); } @@ -229,32 +230,26 @@ void swap_array_2d(T (&a)[N][M], T (&b)[N][M]) { // One will save the value of acc_a and the other will be copy-assigned from // it and will be used to write the values back to acc_b. { - sycl::buffer buf_a{a[0], sycl::range{N, M}}; - sycl::buffer buf_b{b[0], sycl::range{N, M}}; + sycl::buffer buf_a{a[0], sycl::range{N, N}}; + sycl::buffer buf_b{b[0], sycl::range{N, N}}; q.submit([&](sycl::handler &cgh) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; - syclexp::work_group_memory temp{cgh}; - syclexp::work_group_memory temp2{cgh}; - cgh.single_task([=]() { - for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - temp[i][j] = acc_a[i][j]; - acc_a[i][j] = acc_b[i][j]; - } - } - syclexp::work_group_memory temp2; + syclexp::work_group_memory temp{cgh}; + sycl::nd_range<2> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<2> it) { + const auto i = it.get_global_id()[0]; + const auto j = it.get_global_id()[1]; + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + syclexp::work_group_memory temp2; temp2 = temp; - for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - acc_b[i][j] = temp2[i][j]; - } - } + acc_b[i][j] = temp2[i][j]; }); }); } for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { + for (int j = 0; j < N; ++j) { // Two swaps are the same as no swap assert(a[i][j] == old_a[i][j] && b[i][j] == old_b[i][j] && "Incorrect swap!"); @@ -264,31 +259,25 @@ void swap_array_2d(T (&a)[N][M], T (&b)[N][M]) { // Same as above but construct the second temporary inside the kernel and // copy-construct it from the first temporary. { - sycl::buffer buf_a{a[0], sycl::range{N, M}}; - sycl::buffer buf_b{b[0], sycl::range{N, M}}; + sycl::buffer buf_a{a[0], sycl::range{N, N}}; + sycl::buffer buf_b{b[0], sycl::range{N, N}}; q.submit([&](sycl::handler &cgh) { sycl::accessor acc_a{buf_a, cgh}; sycl::accessor acc_b{buf_b, cgh}; - syclexp::work_group_memory temp{cgh}; - syclexp::work_group_memory temp2{cgh}; - cgh.single_task([=]() { - for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - temp[i][j] = acc_a[i][j]; - acc_a[i][j] = acc_b[i][j]; - } - } - syclexp::work_group_memory temp2{temp}; - for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { - acc_b[i][j] = temp2[i][j]; - } - } + syclexp::work_group_memory temp{cgh}; + sycl::nd_range<2> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<2> it) { + const auto i = it.get_global_id()[0]; + const auto j = it.get_global_id()[1]; + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + syclexp::work_group_memory temp2{temp}; + acc_b[i][j] = temp2[i][j]; }); }); } for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { + for (int j = 0; j < N; ++j) { // Three swaps are the same as one swap assert(a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && "Incorrect swap!"); @@ -296,30 +285,49 @@ void swap_array_2d(T (&a)[N][M], T (&b)[N][M]) { } } -constexpr size_t N = 100; -constexpr size_t M = 100; +// Coherency test that checks that work group memory is truly shared by +// work-items in a work group. The test allocates an integer in +// work group memory and each leader of the work groups will assign +// its work group index to this integer. The computation that the +// leader does yields the same value for all work-items in the work-group +// so we can verify that each work-item sees the value written by its leader. +// The test also is a sanity check that different work groups get different +// work group memory locations as otherwise we'd have data races. +void coherency(size_t size, size_t wgsize) { + q.submit([&](sycl::handler &cgh) { + syclexp::work_group_memory data{cgh}; + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { + if (it.get_group().leader()) { + data = it.get_global_id() / wgsize; + } + sycl::group_barrier(it.get_group()); + assert(data == it.get_global_id() / wgsize); + }); + }); +} + +constexpr size_t N = 128; int main() { - int intarr1[N][M]; - int intarr2[N][M]; - float floatarr1[N][M]; - float floatarr2[N][M]; + int intarr1[N][N]; + int intarr2[N][N]; for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { + for (int j = 0; j < N; ++j) { intarr1[i][j] = i + j; intarr2[i][j] = i * j; - floatarr1[i][j] = (i + 1) / (j + 1); - floatarr2[i][j] = (j + 1) / (i + 1); } } for (int i = 0; i < N; ++i) { - for (int j = 0; j < M; ++j) { + for (int j = 0; j < N; ++j) { swap_scalar(intarr1[i][j], intarr2[i][j]); - swap_scalar(floatarr1[i][j], floatarr2[i][j]); } - swap_array_1d(intarr1[i], intarr2[i]); - swap_array_1d(floatarr1[i], floatarr2[i]); + swap_array_1d(intarr1[i], intarr2[i], 8); } - swap_array_2d(intarr1, intarr2); - swap_array_2d(floatarr1, floatarr2); + swap_array_2d(intarr1, intarr2, 8); + coherency(N, N / 2); + coherency(N, N / 4); + coherency(N, N / 8); + coherency(N, N / 16); + coherency(N, N / 32); return 0; } diff --git a/sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp b/sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp new file mode 100644 index 0000000000000..d280f82c18e43 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp @@ -0,0 +1,23 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +// This test checks that work group memory objects can be used with unbounded +// arrays as template arguments. + +// Unbounded array support is not yet implemented for work group memory +// due to a LLVM IR <-> SPIRV translation problem. +// XFAIL: * + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &cgh) { + sycl::ext::oneapi::experimental::work_group_memory data{16, cgh}; + cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> it) { + for (int i = 0; i < 16; ++i) + data[i] = 42; + }); + }); +} diff --git a/sycl/test-e2e/WorkGroupMemory/work_group_memory_invalid_usage_test.cpp b/sycl/test-e2e/WorkGroupMemory/work_group_memory_invalid_usage_test.cpp new file mode 100644 index 0000000000000..3a1609ac21607 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/work_group_memory_invalid_usage_test.cpp @@ -0,0 +1,38 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +#include +#include +using namespace sycl::ext::oneapi::experimental; + +// As per the spec, a work_group_memory object cannot be used in a single task +// kernel or in a sycl::range kernel. An exception with error code +// errc::kernel_argument must be thrown in such cases. This test verifies this. + +int main() { + + sycl::queue q; + try { + q.submit([&](sycl::handler &cgh) { + work_group_memory mem{cgh}; + cgh.single_task([=]() { mem = 42; }); + }); + assert(false && "Work group memory was used in a single_task kernel and an " + "exception was not seen"); // Fail, exception was not seen + } catch (sycl::exception &e) { + // Exception seen but must verify that the error code is correct + assert(e.code() == sycl::errc::kernel_argument); + } + // Same thing but with a range kernel + try { + q.submit([&](sycl::handler &cgh) { + work_group_memory mem{cgh}; + cgh.parallel_for(sycl::range{1}, [=](sycl::id<> it) { mem = 42; }); + }); + assert(false && "Work group memory was used in a range kernel and an " + "exception was not seen"); // Fail, exception was not seen + } catch (sycl::exception &e) { + // Exception seen but must verify that the error code is correct + assert(e.code() == sycl::errc::kernel_argument); + } + return 0; +} From 0e95ee5a3d27114bb61cf87b48ef90604291a284 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 18:33:51 +0200 Subject: [PATCH 035/107] Remove sanity test --- ...nvalid_usage_test.cpp => invalid_test.cpp} | 0 ...ng_work_group_memory.cpp => swap_test.cpp} | 0 .../work_group_memory_sanity_test.cpp | 171 ------------------ 3 files changed, 171 deletions(-) rename sycl/test-e2e/WorkGroupMemory/{work_group_memory_invalid_usage_test.cpp => invalid_test.cpp} (100%) rename sycl/test-e2e/WorkGroupMemory/{swap_test_using_work_group_memory.cpp => swap_test.cpp} (100%) delete mode 100644 sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp diff --git a/sycl/test-e2e/WorkGroupMemory/work_group_memory_invalid_usage_test.cpp b/sycl/test-e2e/WorkGroupMemory/invalid_test.cpp similarity index 100% rename from sycl/test-e2e/WorkGroupMemory/work_group_memory_invalid_usage_test.cpp rename to sycl/test-e2e/WorkGroupMemory/invalid_test.cpp diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp similarity index 100% rename from sycl/test-e2e/WorkGroupMemory/swap_test_using_work_group_memory.cpp rename to sycl/test-e2e/WorkGroupMemory/swap_test.cpp diff --git a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp b/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp deleted file mode 100644 index 7e53e61c54686..0000000000000 --- a/sycl/test-e2e/WorkGroupMemory/work_group_memory_sanity_test.cpp +++ /dev/null @@ -1,171 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -#include -#include -#include - -// Sanity test that checks to see if idiomatic code involving work_group_memory -// objects compiles and runs with no errors. - -namespace syclex = sycl::ext::oneapi::experimental; -sycl::queue global_q; - -constexpr size_t SIZE = 4096; -constexpr size_t WGSIZE = 256; - -struct point { - int x; - int y; -}; - -void simple_inc(const syclex::work_group_memory &mem) { mem++; } - -void fancy_inc(syclex::work_group_memory mem) { - syclex::work_group_memory t = mem; - t = mem; - t++; -} - -void test_breadth() { - sycl::queue q; - global_q = q; - - int *res = sycl::malloc_host(16, q); - - q.submit([&](sycl::handler &cgh) { - syclex::work_group_memory mem1{cgh}; - syclex::work_group_memory mem2{cgh}; - syclex::work_group_memory mem3{cgh}; - syclex::work_group_memory mem4{5, cgh}; - syclex::work_group_memory mem5{2, cgh}; - syclex::work_group_memory mem6{2, cgh}; - syclex::work_group_memory mem7{cgh}; - syclex::work_group_memory mem8{2, cgh}; - - cgh.single_task([=] { - // Operations on scalar - ++mem1; - mem1++; - mem1 += 1; - mem1 = mem1 + 1; - int *p1 = &mem1; - (*p1)++; - simple_inc(mem1); - fancy_inc(mem1); - res[0] = *(mem1.get_multi_ptr()); - res[1] = mem1; - - // Operations on bounded array - mem2[4] = mem2[4] + 1; - int(*p2)[10] = &mem2; - (*p2)[4]++; - res[2] = mem2.get_multi_ptr()[4]; - res[3] = mem2[4]; - - mem3[4] = mem3[4] + 1; - int(*p3)[10] = &mem3; - (*p3)[4]++; - res[4] = mem3.get_multi_ptr()[4]; - res[5] = mem3[4]; - - // Operations on unbounded array - mem4[4] = mem4[4] + 1; - int(*p4)[] = &mem4; - (*p4)[4]++; - res[6] = mem4.get_multi_ptr()[4]; - res[7] = mem4[4]; - - // Operations on unbounded multi-dimensional array - mem5[1][5] = mem5[1][5] + 1; - mem5[1][7] = mem5[1][7] + 1; - res[8] = mem5.get_multi_ptr()[10 + 5]; - res[9] = mem5[1][7]; - - mem6[1][5] = mem6[1][5] + 1; - mem6[1][7] = mem6[1][7] + 1; - res[10] = mem6.get_multi_ptr()[10 + 5]; - res[11] = mem6[1][7]; - - // Operations on scalar struct - (&mem7)->x++; - (&mem7)->y += 1; - point pnt = mem7; - pnt.x++; - pnt.y++; - mem7 = pnt; - res[12] = (&mem7)->x; - res[13] = (&mem7)->y; - - // Operations on unbounded multi-dimensional array of struct - mem8[1][5].x++; - mem8[1][5].y += 1; - res[14] = mem8.get_multi_ptr()[10 + 5].x; - res[15] = mem8[1][5].y; - }); - }).wait(); -} - -void test_basic() { - sycl::queue q; - - q.submit([&](sycl::handler &cgh) { - // Allocate one element for each work-item in the work-group. - syclex::work_group_memory mem{cgh}; - - sycl::nd_range ndr{{SIZE}, {WGSIZE}}; - cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { - size_t id = it.get_local_linear_id(); - - // Each work-item has its own dedicated element of the array. - mem[id] = 0; - }); - }).wait(); -} - -void test_operations() { - sycl::queue q; - - q.submit([&](sycl::handler &cgh) { - syclex::work_group_memory mem1{cgh}; // scalar - syclex::work_group_memory mem2{cgh}; // bounded array - syclex::work_group_memory mem3{5, cgh}; // unbounded array - syclex::work_group_memory mem4{2, - cgh}; // multi-dimensional array - syclex::work_group_memory mem5{cgh}; // array of struct - - sycl::nd_range ndr{{SIZE}, {WGSIZE}}; - cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { - if (it.get_group().leader()) { - // A "work_group_memory" templated on a scalar type acts much like the - // enclosed scalar type. - ++mem1; - mem1++; - mem1 += 1; - mem1 = mem1 + 1; - int *p1 = &mem1; - - // A "work_group_memory" templated on an array type (either bounded or - // unbounded) acts like an array. - ++mem2[4]; - mem2[4]++; - mem2[4] = mem2[4] + 1; - int *p2 = &mem2[4]; - - // A multi-dimensional array works as expected. - mem4[1][5] = mem4[1][5] + 1; - mem4[1][7] = mem4[1][7] + 1; - - // An array of structs works as expected too. - mem5[1].x++; - mem5[1].y = mem5[1].y + 1; - } - }); - }).wait(); -} - -int main() { - test_breadth(); - test_basic(); - test_operations(); -} From 52f13f01cea8ffcf32a3a2d71d24d2ab5fe1251a Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 19:32:16 +0200 Subject: [PATCH 036/107] Move extension doc to proposed --- .../sycl_ext_oneapi_work_group_memory.asciidoc | 0 sycl/source/feature_test.hpp.in | 1 - 2 files changed, 1 deletion(-) rename sycl/doc/extensions/{experimental => proposed}/sycl_ext_oneapi_work_group_memory.asciidoc (100%) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc similarity index 100% rename from sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc rename to sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index c409a4e10db8a..a61e504eb5e4c 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -108,7 +108,6 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_RAW_KERNEL_ARG 1 #define SYCL_EXT_ONEAPI_PROFILING_TAG 1 #define SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND 1 -#define SYCL_EXT_ONEAPI_WORK_GROUP_MEMORY 1 // In progress yet #define SYCL_EXT_ONEAPI_ATOMIC16 0 From 71d101332bdfdd3a2e1f0e19e5c2a3b2401b459d Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 20:19:16 +0200 Subject: [PATCH 037/107] Restore proposed status of work group memory doc --- .../sycl_ext_oneapi_work_group_memory.asciidoc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc index a8857dd213168..9a7875c6987ab 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc @@ -49,10 +49,12 @@ This extension also depends on the following other SYCL extensions: == Status -This is an experimental extension specification, intended to provide early -access to features and gather community feedback. Interfaces defined in this -specification are implemented in {dpcpp}, but they are not finalized and may -change incompatibly in future versions of {dpcpp} without prior notice. +This is a proposed extension specification, intended to gather community +feedback. +Interfaces defined in this specification may not be implemented yet or may be +in a preliminary state. +The specification itself may also change in incompatible ways before it is +finalized. *Shipping software products should not rely on APIs defined in this specification.* From d48bc42d740b764fd1e3ff39fd07c03ab0ec9451 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 21:27:54 +0200 Subject: [PATCH 038/107] Fix unusd variable warning --- .../sycl/ext/oneapi/experimental/work_group_memory.hpp | 4 ++-- .../WorkGroupMemory/{invalid_test.cpp => invalid_usage.cpp} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename sycl/test-e2e/WorkGroupMemory/{invalid_test.cpp => invalid_usage.cpp} (100%) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 8644806ea9572..dec901adcd63a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -54,12 +54,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory work_group_memory &operator=(const work_group_memory &rhs) = default; template >> - work_group_memory(handler &cgh) + work_group_memory(handler &) : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), sizeof(DataT)) {} template >> - work_group_memory(size_t num, handler &cgh) + work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl( sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} diff --git a/sycl/test-e2e/WorkGroupMemory/invalid_test.cpp b/sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp similarity index 100% rename from sycl/test-e2e/WorkGroupMemory/invalid_test.cpp rename to sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp From f6515bc89f05e37ce21e1f891c8fcb08951804a1 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 22:03:46 +0200 Subject: [PATCH 039/107] Reduce test size to make sure UR does not run out or resources --- sycl/test-e2e/WorkGroupMemory/swap_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp index 080e551e622e2..2d0aae14adab1 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp @@ -307,7 +307,7 @@ void coherency(size_t size, size_t wgsize) { }); } -constexpr size_t N = 128; +constexpr size_t N = 32; int main() { int intarr1[N][N]; int intarr2[N][N]; From 3e4c73c37ec2afaded640fed6492560a873f2bb7 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Fri, 27 Sep 2024 22:50:53 +0200 Subject: [PATCH 040/107] Replace sycl.hpp with core.hpp in the includes of E2E test --- sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp b/sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp index 3a1609ac21607..7501fcd7591d5 100644 --- a/sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp +++ b/sycl/test-e2e/WorkGroupMemory/invalid_usage.cpp @@ -1,7 +1,8 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out #include -#include +#include +#include using namespace sycl::ext::oneapi::experimental; // As per the spec, a work_group_memory object cannot be used in a single task From c84229e8e17c86c4e4adcc5bb15d79673d30b06b Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Sun, 29 Sep 2024 22:15:44 -0400 Subject: [PATCH 041/107] Remove sycl.hpp include from tests --- sycl/test-e2e/WorkGroupMemory/swap_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp index 2d0aae14adab1..2c67490d4c48a 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace syclexp = sycl::ext::oneapi::experimental; sycl::queue q; From d2fddd84e64a2209ab38ab66b9950485cfe4a20f Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Wed, 2 Oct 2024 22:35:50 +0200 Subject: [PATCH 042/107] Add support for unbounded arrays --- clang/lib/CodeGen/CodeGenTypes.cpp | 8 +++ .../oneapi/experimental/work_group_memory.hpp | 17 +++---- sycl/include/sycl/handler.hpp | 5 +- sycl/test-e2e/WorkGroupMemory/swap_test.cpp | 50 +++++++++++++++++++ .../WorkGroupMemory/unbounded_array_test.cpp | 23 --------- 5 files changed, 66 insertions(+), 37 deletions(-) delete mode 100644 sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index edf26dd6186ba..cdf80f2d8397c 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -488,6 +488,14 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { return Ty; } } + // An incomplete array AST type is typically lowered to an array of length zero in LLVM IR. + // For SYCL devices, this is incompatible with SPIRV which does not accept arrays of length zero + // so we explicitly intercept this case to instead lower to an array of length 1 instead. + if (Context.getLangOpts().SYCLIsDevice) + if (T->isIncompleteArrayType()) { + return llvm::ArrayType::get(ConvertType(cast(T)->getElementType()), 1); +} +} // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast(Ty)) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index dec901adcd63a..dfded4c3b7216 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -22,19 +22,17 @@ inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; class work_group_memory_impl { public: - work_group_memory_impl() : wgm_size{0}, buffer_size{0} {} + work_group_memory_impl() : buffer_size{0} {} work_group_memory_impl(const work_group_memory_impl &rhs) = default; work_group_memory_impl & operator=(const work_group_memory_impl &rhs) = default; - work_group_memory_impl(size_t wgm_size, size_t buffer_size) - : wgm_size{wgm_size}, buffer_size{buffer_size} {} - size_t wgm_size; + work_group_memory_impl(size_t buffer_size) + : buffer_size{buffer_size} {} + private: size_t buffer_size; + friend class sycl::handler; }; -inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { - return wgm->wgm_size; -} } // namespace detail namespace ext::oneapi::experimental { @@ -55,13 +53,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory template >> work_group_memory(handler &) - : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), + : sycl::detail::work_group_memory_impl( sizeof(DataT)) {} template >> work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl( - sizeof(work_group_memory), num * sizeof(std::remove_extent_t)) {} template multi_ptr @@ -70,7 +67,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory IsDecorated, value_type>(ptr); } DataT *operator&() const { return reinterpret_cast(ptr); } - operator DataT &() const { return *reinterpret_cast(ptr); } + operator DataT &() const { return *(this->operator&()); } template >> const work_group_memory &operator=(const DataT &value) const { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 987e4ac6cea3a..70da0dddc225c 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -171,8 +171,6 @@ class graph_impl; } // namespace ext::oneapi::experimental::detail namespace detail { -class work_group_memory_impl; -size_t getWorkGroupMemoryOwnSize(work_group_memory_impl *); class handler_impl; class kernel_impl; class queue_impl; @@ -720,8 +718,7 @@ class __SYCL_EXPORT handler { const ext::oneapi::experimental::work_group_memory &Arg) { addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, - detail::getWorkGroupMemoryOwnSize( - static_cast(&Arg)), + 0, ArgIndex); } diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp index 2d0aae14adab1..1db06559e7639 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp @@ -186,6 +186,29 @@ void swap_array_1d(T (&a)[N], T (&b)[N], size_t batch_size) { for (int i = 0; i < N; ++i) { assert(a[i] == old_b[i] && b[i] == old_a[i] && "Incorrect swap!"); } + + // Same as above but use an unbounded array as temporary storage + { + sycl::buffer buf_a{a, N}; + sycl::buffer buf_b{b, N}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{N, cgh}; + sycl::nd_range<1> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + const auto i = it.get_global_id(); + auto ptr = &temp; + (*ptr)[i] = acc_a[i]; + acc_a[i] = acc_b[i]; + acc_b[i] = (*ptr)[i]; + }); + }); + } + // Four swaps same as no swap + for (int i = 0; i < N; ++i) { + assert(a[i] == old_a[i] && b[i] == old_b[i] && "Incorrect swap!"); + } } template @@ -283,6 +306,33 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { "Incorrect swap!"); } } + + // Same as above but use an unbounded array as temporary storage +{ + sycl::buffer buf_a{a[0], sycl::range{N, N}}; + sycl::buffer buf_b{b[0], sycl::range{N, N}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor acc_a{buf_a, cgh}; + sycl::accessor acc_b{buf_b, cgh}; + syclexp::work_group_memory temp{N, cgh}; + sycl::nd_range<2> ndr{size, wgsize}; + cgh.parallel_for(ndr, [=](sycl::nd_item<2> it) { + const auto i = it.get_global_id()[0]; + const auto j = it.get_global_id()[1]; + temp[i][j] = acc_a[i][j]; + acc_a[i][j] = acc_b[i][j]; + syclexp::work_group_memory temp2{temp}; + acc_b[i][j] = temp2[i][j]; + }); + }); + } + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + // Four swaps are the same as no swap + assert(a[i][j] == old_a[i][j] && b[i][j] == old_b[i][j] && + "Incorrect swap!"); + } + } } // Coherency test that checks that work group memory is truly shared by diff --git a/sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp b/sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp deleted file mode 100644 index d280f82c18e43..0000000000000 --- a/sycl/test-e2e/WorkGroupMemory/unbounded_array_test.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -#include -#include - -// This test checks that work group memory objects can be used with unbounded -// arrays as template arguments. - -// Unbounded array support is not yet implemented for work group memory -// due to a LLVM IR <-> SPIRV translation problem. -// XFAIL: * - -int main() { - sycl::queue q; - q.submit([&](sycl::handler &cgh) { - sycl::ext::oneapi::experimental::work_group_memory data{16, cgh}; - cgh.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> it) { - for (int i = 0; i < 16; ++i) - data[i] = 42; - }); - }); -} From 0f677c297e0237eefe6254953b8be16304f402bf Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Wed, 2 Oct 2024 22:38:22 +0200 Subject: [PATCH 043/107] Fix compilation errors --- clang/lib/CodeGen/CodeGenTypes.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index cdf80f2d8397c..e8ec8b97ed0eb 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -488,14 +488,14 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { return Ty; } } - // An incomplete array AST type is typically lowered to an array of length zero in LLVM IR. - // For SYCL devices, this is incompatible with SPIRV which does not accept arrays of length zero - // so we explicitly intercept this case to instead lower to an array of length 1 instead. - if (Context.getLangOpts().SYCLIsDevice) - if (T->isIncompleteArrayType()) { - return llvm::ArrayType::get(ConvertType(cast(T)->getElementType()), 1); -} -} + // An incomplete array AST type is typically lowered to an array of length + // zero in LLVM IR. For SYCL devices, this is incompatible with SPIRV which + // does not accept arrays of length zero so we explicitly intercept this case + // to instead lower to an array of length 1 instead. + if (Context.getLangOpts().SYCLIsDevice) + if (T->isIncompleteArrayType()) + return llvm::ArrayType::get( + ConvertType(cast(T)->getElementType()), 1); // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast(Ty)) From 6ef823ed2185fdb3f15ab4f9ddc7ac75bfdf30ff Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Wed, 2 Oct 2024 22:41:37 +0200 Subject: [PATCH 044/107] Improve swap test --- sycl/test-e2e/WorkGroupMemory/swap_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp index 1db06559e7639..e82e986f7dc62 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/swap_test.cpp @@ -306,9 +306,9 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { "Incorrect swap!"); } } - + // Same as above but use an unbounded array as temporary storage -{ + { sycl::buffer buf_a{a[0], sycl::range{N, N}}; sycl::buffer buf_b{b[0], sycl::range{N, N}}; q.submit([&](sycl::handler &cgh) { @@ -325,7 +325,7 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { acc_b[i][j] = temp2[i][j]; }); }); - } + } for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { // Four swaps are the same as no swap From 4de6d5034e5b9c08898487621f77f568fcd284d8 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 3 Oct 2024 06:17:52 +0200 Subject: [PATCH 045/107] Refactor CodeGenTypes.cpp changes --- clang/lib/CodeGen/CodeGenTypes.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index e8ec8b97ed0eb..85366762792c3 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -488,15 +488,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { return Ty; } } - // An incomplete array AST type is typically lowered to an array of length - // zero in LLVM IR. For SYCL devices, this is incompatible with SPIRV which - // does not accept arrays of length zero so we explicitly intercept this case - // to instead lower to an array of length 1 instead. - if (Context.getLangOpts().SYCLIsDevice) - if (T->isIncompleteArrayType()) - return llvm::ArrayType::get( - ConvertType(cast(T)->getElementType()), 1); - // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast(Ty)) return ConvertRecordDeclType(RT->getDecl()); @@ -816,7 +807,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { SkippedLayout = true; ResultType = llvm::Type::getInt8Ty(getLLVMContext()); } - ResultType = llvm::ArrayType::get(ResultType, 0); + ResultType = llvm::ArrayType::get(ResultType, 1); break; } case Type::ArrayParameter: From 5653f045bbd6550327b6d8e4d927977d841944d8 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 3 Oct 2024 06:18:40 +0200 Subject: [PATCH 046/107] Refactor CodeGenTypes.cpp changes --- clang/lib/CodeGen/CodeGenTypes.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 85366762792c3..bdb917cd8f405 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -488,6 +488,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { return Ty; } } + // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast(Ty)) return ConvertRecordDeclType(RT->getDecl()); From 40eb63ef7cdaa386bd17bbd645deb637d0b46301 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 3 Oct 2024 18:15:31 +0200 Subject: [PATCH 047/107] translate unbounded arrays to 1-sized arrays in LLVM IR in device compilation --- clang/lib/CodeGen/CodeGenTypes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index bdb917cd8f405..4fcb7ebe6d80c 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -807,8 +807,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { if (!ResultType->isSized()) { SkippedLayout = true; ResultType = llvm::Type::getInt8Ty(getLLVMContext()); - } - ResultType = llvm::ArrayType::get(ResultType, 1); + } + ResultType = llvm::ArrayType::get(ResultType, Context.getLangOpts().SYCLIsDevice ? 1 : 0); break; } case Type::ArrayParameter: From f6a0df72d09a6b5980313da9ae837ae461328d78 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 3 Oct 2024 12:16:14 -0400 Subject: [PATCH 048/107] Remove trailing spaces --- clang/lib/CodeGen/CodeGenTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 4fcb7ebe6d80c..b452e4062a1bb 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -807,7 +807,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { if (!ResultType->isSized()) { SkippedLayout = true; ResultType = llvm::Type::getInt8Ty(getLLVMContext()); - } + } ResultType = llvm::ArrayType::get(ResultType, Context.getLangOpts().SYCLIsDevice ? 1 : 0); break; } From 026501c7fe5d4cf3659bc3108588b151345d9dd2 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 3 Oct 2024 19:04:54 +0200 Subject: [PATCH 049/107] Add unbounded array support by modifying LLVM IR -> SPIRV type lowering --- clang/lib/CodeGen/CodeGenTypes.cpp | 5 +++-- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 11 +++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 4fcb7ebe6d80c..943dd29834a10 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -807,8 +807,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { if (!ResultType->isSized()) { SkippedLayout = true; ResultType = llvm::Type::getInt8Ty(getLLVMContext()); - } - ResultType = llvm::ArrayType::get(ResultType, Context.getLangOpts().SYCLIsDevice ? 1 : 0); + } + ResultType = llvm::ArrayType::get( + ResultType, 0); break; } case Type::ArrayParameter: diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 775296146c528..ddd424f55e2fd 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -453,24 +453,19 @@ SPIRVType *LLVMToSPIRVBase::transType(Type *T) { if (T->isArrayTy()) { // SPIR-V 1.3 s3.32.6: Length is the number of elements in the array. // It must be at least 1. - if (T->getArrayNumElements() < 1) { - std::string Str; - llvm::raw_string_ostream OS(Str); - OS << *T; - SPIRVCK(T->getArrayNumElements() >= 1, InvalidArraySize, OS.str()); - } + const auto ArraySize = T->getArrayNumElements() ? T->getArrayNumElements() : 1; Type *ElTy = T->getArrayElementType(); SPIRVType *TransType = BM->addArrayType( transType(ElTy), static_cast(transValue( - ConstantInt::get(getSizetType(), T->getArrayNumElements(), false), + ConstantInt::get(getSizetType(), ArraySize, false), nullptr))); mapType(T, TransType); if (ElTy->isPointerTy()) { mapType( ArrayType::get(TypedPointerType::get(Type::getInt8Ty(*Ctx), ElTy->getPointerAddressSpace()), - T->getArrayNumElements()), + ArraySize), TransType); } return TransType; From 2ce21b35865413f7e3d07f87f370c6404d28e168 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 3 Oct 2024 19:05:45 +0200 Subject: [PATCH 050/107] Revert CodeGenTypes.cpp changes --- clang/lib/CodeGen/CodeGenTypes.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 943dd29834a10..edf26dd6186ba 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -808,8 +808,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { SkippedLayout = true; ResultType = llvm::Type::getInt8Ty(getLLVMContext()); } - ResultType = llvm::ArrayType::get( - ResultType, 0); + ResultType = llvm::ArrayType::get(ResultType, 0); break; } case Type::ArrayParameter: From d73b0b19fcb05a3eb2b414f8e8b586bc4ee312b7 Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Mon, 7 Oct 2024 19:35:52 +0200 Subject: [PATCH 051/107] Update SPIRVWriter.cpp --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 6 +++--- sycl/include/sycl/handler.hpp | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index ddd424f55e2fd..e34a50dbd74a0 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -453,13 +453,13 @@ SPIRVType *LLVMToSPIRVBase::transType(Type *T) { if (T->isArrayTy()) { // SPIR-V 1.3 s3.32.6: Length is the number of elements in the array. // It must be at least 1. - const auto ArraySize = T->getArrayNumElements() ? T->getArrayNumElements() : 1; + const auto ArraySize = + T->getArrayNumElements() ? T->getArrayNumElements() : 1; Type *ElTy = T->getArrayElementType(); SPIRVType *TransType = BM->addArrayType( transType(ElTy), static_cast(transValue( - ConstantInt::get(getSizetType(), ArraySize, false), - nullptr))); + ConstantInt::get(getSizetType(), ArraySize, false), nullptr))); mapType(T, TransType); if (ElTy->isPointerTy()) { mapType( diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 70da0dddc225c..5b20d55816082 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -717,8 +717,7 @@ class __SYCL_EXPORT handler { int ArgIndex, const ext::oneapi::experimental::work_group_memory &Arg) { - addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, - 0, + addArg(detail::kernel_param_kind_t::kind_work_group_memory, &Arg, 0, ArgIndex); } From 31481b8197480970d9bd271888ac918ee083e37b Mon Sep 17 00:00:00 2001 From: "Bushi, Lorenc" Date: Thu, 10 Oct 2024 21:00:13 +0200 Subject: [PATCH 052/107] Revert SPIRV translator changes --- clang/lib/Sema/SemaSYCL.cpp | 7007 ----------------- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 130 +- .../oneapi/experimental/work_group_memory.hpp | 85 - 3 files changed, 45 insertions(+), 7177 deletions(-) delete mode 100644 clang/lib/Sema/SemaSYCL.cpp delete mode 100644 sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp deleted file mode 100644 index d8925669b1ae3..0000000000000 --- a/clang/lib/Sema/SemaSYCL.cpp +++ /dev/null @@ -1,7007 +0,0 @@ -//===- SemaSYCL.cpp - Semantic Analysis for SYCL constructs ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This implements Semantic Analysis for SYCL constructs. -//===----------------------------------------------------------------------===// - -#include "clang/Sema/SemaSYCL.h" -#include "TreeTransform.h" -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/QualTypeNames.h" -#include "clang/AST/RecordLayout.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/TemplateArgumentVisitor.h" -#include "clang/AST/TypeOrdering.h" -#include "clang/AST/TypeVisitor.h" -#include "clang/Analysis/CallGraph.h" -#include "clang/Basic/Attributes.h" -#include "clang/Basic/Builtins.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/TargetInfo.h" -#include "clang/Basic/Version.h" -#include "clang/Sema/Attr.h" -#include "clang/Sema/Initialization.h" -#include "clang/Sema/ParsedAttr.h" -#include "clang/Sema/Sema.h" -#include "llvm/ADT/APSInt.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" - -#include -#include -#include - -using namespace clang; -using namespace std::placeholders; - -using KernelParamKind = SYCLIntegrationHeader::kernel_param_kind_t; - -enum target { - global_buffer = 2014, - constant_buffer, - local, - image, - host_buffer, - host_image, - image_array -}; - -using ParamDesc = std::tuple; - -enum KernelInvocationKind { - InvokeUnknown, - InvokeSingleTask, - InvokeParallelFor, - InvokeParallelForWorkGroup -}; - -static constexpr llvm::StringLiteral InitMethodName = "__init"; -static constexpr llvm::StringLiteral InitESIMDMethodName = "__init_esimd"; -static constexpr llvm::StringLiteral InitSpecConstantsBuffer = - "__init_specialization_constants_buffer"; -static constexpr llvm::StringLiteral FinalizeMethodName = "__finalize"; -static constexpr llvm::StringLiteral LibstdcxxFailedAssertion = - "__failed_assertion"; -constexpr unsigned MaxKernelArgsSize = 2048; - -bool SemaSYCL::isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName) { - const auto *RD = Ty->getAsCXXRecordDecl(); - if (!RD) - return false; - - if (const auto *Attr = RD->getAttr()) - return Attr->getType() == TypeName; - - if (const auto *CTSD = dyn_cast(RD)) - if (CXXRecordDecl *TemplateDecl = - CTSD->getSpecializedTemplate()->getTemplatedDecl()) - if (const auto *Attr = TemplateDecl->getAttr()) - return Attr->getType() == TypeName; - - return false; -} - -static bool isSyclAccessorType(QualType Ty) { - return SemaSYCL::isSyclType(Ty, SYCLTypeAttr::accessor) || - SemaSYCL::isSyclType(Ty, SYCLTypeAttr::local_accessor); -} - -// FIXME: Accessor property lists should be modified to use compile-time -// properties. Once implemented, this function (and possibly all/most code -// in SemaSYCL.cpp handling no_alias and buffer_location property) can be -// removed. -static bool isAccessorPropertyType(QualType Ty, - SYCLTypeAttr::SYCLType TypeName) { - if (const auto *RD = Ty->getAsCXXRecordDecl()) - if (const auto *Parent = dyn_cast(RD->getParent())) - if (const auto *Attr = Parent->getAttr()) - return Attr->getType() == TypeName; - - return false; -} - -static bool isSyclSpecialType(QualType Ty, SemaSYCL &S) { - return S.isTypeDecoratedWithDeclAttribute(Ty); -} - -ExprResult SemaSYCL::ActOnSYCLBuiltinNumFieldsExpr(ParsedType PT) { - TypeSourceInfo *TInfo = nullptr; - QualType QT = Sema::GetTypeFromParser(PT, &TInfo); - assert(TInfo && "couldn't get type info from a type from the parser?"); - SourceLocation TypeLoc = TInfo->getTypeLoc().getBeginLoc(); - - return BuildSYCLBuiltinNumFieldsExpr(TypeLoc, QT); -} - -ExprResult SemaSYCL::BuildSYCLBuiltinNumFieldsExpr(SourceLocation Loc, - QualType SourceTy) { - if (!SourceTy->isDependentType()) { - if (SemaRef.RequireCompleteType( - Loc, SourceTy, diag::err_sycl_type_trait_requires_complete_type, - /*__builtin_num_fields*/ 0)) - return ExprError(); - - if (!SourceTy->isRecordType()) { - Diag(Loc, diag::err_sycl_type_trait_requires_record_type) - << /*__builtin_num_fields*/ 0; - return ExprError(); - } - } - return new (getASTContext()) - SYCLBuiltinNumFieldsExpr(Loc, SourceTy, getASTContext().getSizeType()); -} - -ExprResult SemaSYCL::ActOnSYCLBuiltinFieldTypeExpr(ParsedType PT, Expr *Idx) { - TypeSourceInfo *TInfo = nullptr; - QualType QT = Sema::GetTypeFromParser(PT, &TInfo); - assert(TInfo && "couldn't get type info from a type from the parser?"); - SourceLocation TypeLoc = TInfo->getTypeLoc().getBeginLoc(); - - return BuildSYCLBuiltinFieldTypeExpr(TypeLoc, QT, Idx); -} - -ExprResult SemaSYCL::BuildSYCLBuiltinFieldTypeExpr(SourceLocation Loc, - QualType SourceTy, - Expr *Idx) { - // If the expression appears in an evaluated context, we want to give an - // error so that users don't attempt to use the value of this expression. - if (!SemaRef.isUnevaluatedContext()) { - Diag(Loc, diag::err_sycl_builtin_type_trait_evaluated) - << /*__builtin_field_type*/ 0; - return ExprError(); - } - - // We may not be able to calculate the field type (the source type may be a - // dependent type), so use the source type as a basic fallback. This will - // ensure that the AST node will have a dependent type that gets resolved - // later to the real type. - QualType FieldTy = SourceTy; - ExprValueKind ValueKind = VK_PRValue; - if (!SourceTy->isDependentType()) { - if (SemaRef.RequireCompleteType( - Loc, SourceTy, diag::err_sycl_type_trait_requires_complete_type, - /*__builtin_field_type*/ 1)) - return ExprError(); - - if (!SourceTy->isRecordType()) { - Diag(Loc, diag::err_sycl_type_trait_requires_record_type) - << /*__builtin_field_type*/ 1; - return ExprError(); - } - - if (!Idx->isValueDependent()) { - std::optional IdxVal = - Idx->getIntegerConstantExpr(getASTContext()); - if (IdxVal) { - RecordDecl *RD = SourceTy->getAsRecordDecl(); - assert(RD && "Record type but no record decl?"); - int64_t Index = IdxVal->getExtValue(); - - if (Index < 0) { - Diag(Idx->getExprLoc(), - diag::err_sycl_type_trait_requires_nonnegative_index) - << /*fields*/ 0; - return ExprError(); - } - - // Ensure that the index is within range. - int64_t NumFields = std::distance(RD->field_begin(), RD->field_end()); - if (Index >= NumFields) { - Diag(Idx->getExprLoc(), - diag::err_sycl_builtin_type_trait_index_out_of_range) - << toString(*IdxVal, 10) << SourceTy << /*fields*/ 0; - return ExprError(); - } - const FieldDecl *FD = *std::next(RD->field_begin(), Index); - FieldTy = FD->getType(); - - // If the field type was a reference type, adjust it now. - if (FieldTy->isLValueReferenceType()) { - ValueKind = VK_LValue; - FieldTy = FieldTy.getNonReferenceType(); - } else if (FieldTy->isRValueReferenceType()) { - ValueKind = VK_XValue; - FieldTy = FieldTy.getNonReferenceType(); - } - } - } - } - return new (getASTContext()) - SYCLBuiltinFieldTypeExpr(Loc, SourceTy, Idx, FieldTy, ValueKind); -} - -ExprResult SemaSYCL::ActOnSYCLBuiltinNumBasesExpr(ParsedType PT) { - TypeSourceInfo *TInfo = nullptr; - QualType QT = Sema::GetTypeFromParser(PT, &TInfo); - assert(TInfo && "couldn't get type info from a type from the parser?"); - SourceLocation TypeLoc = TInfo->getTypeLoc().getBeginLoc(); - - return BuildSYCLBuiltinNumBasesExpr(TypeLoc, QT); -} - -ExprResult SemaSYCL::BuildSYCLBuiltinNumBasesExpr(SourceLocation Loc, - QualType SourceTy) { - if (!SourceTy->isDependentType()) { - if (SemaRef.RequireCompleteType( - Loc, SourceTy, diag::err_sycl_type_trait_requires_complete_type, - /*__builtin_num_bases*/ 2)) - return ExprError(); - - if (!SourceTy->isRecordType()) { - Diag(Loc, diag::err_sycl_type_trait_requires_record_type) - << /*__builtin_num_bases*/ 2; - return ExprError(); - } - } - return new (getASTContext()) - SYCLBuiltinNumBasesExpr(Loc, SourceTy, getASTContext().getSizeType()); -} - -ExprResult SemaSYCL::ActOnSYCLBuiltinBaseTypeExpr(ParsedType PT, Expr *Idx) { - TypeSourceInfo *TInfo = nullptr; - QualType QT = SemaRef.GetTypeFromParser(PT, &TInfo); - assert(TInfo && "couldn't get type info from a type from the parser?"); - SourceLocation TypeLoc = TInfo->getTypeLoc().getBeginLoc(); - - return BuildSYCLBuiltinBaseTypeExpr(TypeLoc, QT, Idx); -} - -ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, - QualType SourceTy, - Expr *Idx) { - // If the expression appears in an evaluated context, we want to give an - // error so that users don't attempt to use the value of this expression. - if (!SemaRef.isUnevaluatedContext()) { - Diag(Loc, diag::err_sycl_builtin_type_trait_evaluated) - << /*__builtin_base_type*/ 1; - return ExprError(); - } - - // We may not be able to calculate the base type (the source type may be a - // dependent type), so use the source type as a basic fallback. This will - // ensure that the AST node will have a dependent type that gets resolved - // later to the real type. - QualType BaseTy = SourceTy; - if (!SourceTy->isDependentType()) { - if (SemaRef.RequireCompleteType( - Loc, SourceTy, diag::err_sycl_type_trait_requires_complete_type, - /*__builtin_base_type*/ 3)) - return ExprError(); - - if (!SourceTy->isRecordType()) { - Diag(Loc, diag::err_sycl_type_trait_requires_record_type) - << /*__builtin_base_type*/ 3; - return ExprError(); - } - - if (!Idx->isValueDependent()) { - std::optional IdxVal = - Idx->getIntegerConstantExpr(getASTContext()); - if (IdxVal) { - CXXRecordDecl *RD = SourceTy->getAsCXXRecordDecl(); - assert(RD && "Record type but no record decl?"); - int64_t Index = IdxVal->getExtValue(); - - if (Index < 0) { - Diag(Idx->getExprLoc(), - diag::err_sycl_type_trait_requires_nonnegative_index) - << /*bases*/ 1; - return ExprError(); - } - - // Ensure that the index is within range. - if (Index >= RD->getNumBases()) { - Diag(Idx->getExprLoc(), - diag::err_sycl_builtin_type_trait_index_out_of_range) - << toString(*IdxVal, 10) << SourceTy << /*bases*/ 1; - return ExprError(); - } - - const CXXBaseSpecifier &Spec = *std::next(RD->bases_begin(), Index); - BaseTy = Spec.getType(); - } - } - } - return new (getASTContext()) - SYCLBuiltinBaseTypeExpr(Loc, SourceTy, Idx, BaseTy); -} - -/// Returns true if the target requires a new type. -/// This happens if a pointer to generic cannot be passed -static bool targetRequiresNewType(ASTContext &Context) { - llvm::Triple T = Context.getTargetInfo().getTriple(); - return !T.isNVPTX(); -} - -// This information is from Section 4.13 of the SYCL spec -// https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf -// This function returns false if the math lib function -// corresponding to the input builtin is not supported -// for SYCL -static bool IsSyclMathFunc(unsigned BuiltinID) { - switch (BuiltinID) { - case Builtin::BIlround: - case Builtin::BI__builtin_lround: - case Builtin::BIceill: - case Builtin::BI__builtin_ceill: - case Builtin::BIcopysignl: - case Builtin::BI__builtin_copysignl: - case Builtin::BIcosl: - case Builtin::BI__builtin_cosl: - case Builtin::BIexpl: - case Builtin::BI__builtin_expl: - case Builtin::BIexp2l: - case Builtin::BI__builtin_exp2l: - case Builtin::BIfabsl: - case Builtin::BI__builtin_fabsl: - case Builtin::BIfloorl: - case Builtin::BI__builtin_floorl: - case Builtin::BIfmal: - case Builtin::BI__builtin_fmal: - case Builtin::BIfmaxl: - case Builtin::BI__builtin_fmaxl: - case Builtin::BIfminl: - case Builtin::BI__builtin_fminl: - case Builtin::BIfmodl: - case Builtin::BI__builtin_fmodl: - case Builtin::BIlogl: - case Builtin::BI__builtin_logl: - case Builtin::BIlog10l: - case Builtin::BI__builtin_log10l: - case Builtin::BIlog2l: - case Builtin::BI__builtin_log2l: - case Builtin::BIpowl: - case Builtin::BI__builtin_powl: - case Builtin::BIrintl: - case Builtin::BI__builtin_rintl: - case Builtin::BIroundl: - case Builtin::BI__builtin_roundl: - case Builtin::BIsinl: - case Builtin::BI__builtin_sinl: - case Builtin::BIsqrtl: - case Builtin::BI__builtin_sqrtl: - case Builtin::BItruncl: - case Builtin::BI__builtin_truncl: - case Builtin::BIlroundl: - case Builtin::BI__builtin_lroundl: - case Builtin::BIlroundf: - case Builtin::BI__builtin_lroundf: - return false; - default: - break; - } - return true; -} - -bool SemaSYCL::isDeclAllowedInSYCLDeviceCode(const Decl *D) { - if (const FunctionDecl *FD = dyn_cast(D)) { - const IdentifierInfo *II = FD->getIdentifier(); - - // Allow __builtin_assume_aligned and __builtin_printf to be called from - // within device code. - if (FD->getBuiltinID() && - (FD->getBuiltinID() == Builtin::BI__builtin_assume_aligned || - FD->getBuiltinID() == Builtin::BI__builtin_printf)) - return true; - - const DeclContext *DC = FD->getDeclContext(); - if (II && II->isStr("__spirv_ocl_printf") && - !FD->isDefined() && - FD->getLanguageLinkage() == CXXLanguageLinkage && - DC->getEnclosingNamespaceContext()->isTranslationUnit()) - return true; - } - return false; -} - -SemaSYCL::SemaSYCL(Sema &S) - : SemaBase(S), SyclIntHeader(nullptr), SyclIntFooter(nullptr) {} - -static bool isZeroSizedArray(SemaSYCL &S, QualType Ty) { - if (const auto *CAT = S.getASTContext().getAsConstantArrayType(Ty)) - return CAT->isZeroSize(); - return false; -} - -static void checkSYCLType(SemaSYCL &S, QualType Ty, SourceRange Loc, - llvm::DenseSet Visited, - SourceRange UsedAtLoc = SourceRange()) { - // Not all variable types are supported inside SYCL kernels, - // for example the quad type __float128 will cause errors in the - // SPIR-V translation phase. - // Here we check any potentially unsupported declaration and issue - // a deferred diagnostic, which will be emitted iff the declaration - // is discovered to reside in kernel code. - // The optional UsedAtLoc param is used when the SYCL usage is at a - // different location than the variable declaration and we need to - // inform the user of both, e.g. struct member usage vs declaration. - - bool Emitting = false; - - //--- check types --- - - // zero length arrays - if (isZeroSizedArray(S, Ty)) { - S.DiagIfDeviceCode(Loc.getBegin(), diag::err_typecheck_zero_array_size) - << 1; - Emitting = true; - } - - // variable length arrays - if (Ty->isVariableArrayType()) { - S.DiagIfDeviceCode(Loc.getBegin(), diag::err_vla_unsupported) << 0; - Emitting = true; - } - - // Sub-reference array or pointer, then proceed with that type. - while (Ty->isAnyPointerType() || Ty->isArrayType()) - Ty = QualType{Ty->getPointeeOrArrayElementType(), 0}; - - // __int128, __int128_t, __uint128_t, long double, __float128 - if (Ty->isSpecificBuiltinType(BuiltinType::Int128) || - Ty->isSpecificBuiltinType(BuiltinType::UInt128) || - Ty->isSpecificBuiltinType(BuiltinType::LongDouble) || - Ty->isSpecificBuiltinType(BuiltinType::BFloat16) || - (Ty->isSpecificBuiltinType(BuiltinType::Float128) && - !S.getASTContext().getTargetInfo().hasFloat128Type())) { - S.DiagIfDeviceCode(Loc.getBegin(), diag::err_type_unsupported) - << Ty.getUnqualifiedType().getCanonicalType(); - Emitting = true; - } - - if (Emitting && UsedAtLoc.isValid()) - S.DiagIfDeviceCode(UsedAtLoc.getBegin(), diag::note_used_here); - - //--- now recurse --- - // Pointers complicate recursion. Add this type to Visited. - // If already there, bail out. - if (!Visited.insert(Ty).second) - return; - - if (const auto *ATy = dyn_cast(Ty)) - return checkSYCLType(S, ATy->getModifiedType(), Loc, Visited); - - if (const auto *RD = Ty->getAsRecordDecl()) { - for (const auto &Field : RD->fields()) - checkSYCLType(S, Field->getType(), Field->getSourceRange(), Visited, Loc); - } else if (const auto *FPTy = dyn_cast(Ty)) { - for (const auto &ParamTy : FPTy->param_types()) - checkSYCLType(S, ParamTy, Loc, Visited); - checkSYCLType(S, FPTy->getReturnType(), Loc, Visited); - } -} - -void SemaSYCL::checkSYCLDeviceVarDecl(VarDecl *Var) { - assert(getLangOpts().SYCLIsDevice && - "Should only be called during SYCL compilation"); - QualType Ty = Var->getType(); - SourceRange Loc = Var->getLocation(); - llvm::DenseSet Visited; - - checkSYCLType(*this, Ty, Loc, Visited); -} - -enum NotForwardDeclarableReason { - UnscopedEnum, - StdNamespace, - UnnamedTag, - NotAtNamespaceScope, - None -}; - -// This is a helper function which is used to check if a class declaration is: -// * declared within namespace 'std' (at any level) -// e.g., namespace std { namespace literals { class Whatever; } } -// h.single_task([]() {}); -// * declared within a function -// e.g., void foo() { struct S { int i; }; -// h.single_task([]() {}); } -// * declared within another tag -// e.g., struct S { struct T { int i } t; }; -// h.single_task([]() {}); -// User for kernel name types and class/struct types used in free function -// kernel arguments. -static NotForwardDeclarableReason -isForwardDeclarable(const NamedDecl *DeclToCheck, SemaSYCL &S, - bool DiagForFreeFunction = false) { - if (const auto *ED = dyn_cast(DeclToCheck); - ED && !ED->isScoped() && !ED->isFixed()) - return NotForwardDeclarableReason::UnscopedEnum; - - const DeclContext *DeclCtx = DeclToCheck->getDeclContext(); - if (DeclCtx) { - while (!DeclCtx->isTranslationUnit() && - (isa(DeclCtx) || isa(DeclCtx))) { - const auto *NSDecl = dyn_cast(DeclCtx); - // We don't report free function kernel parameter case because the - // restriction for the type used there to be forward declarable comes from - // the need to forward declare it in the integration header. We're safe - // to do so because the integration header is an implemention detail and - // is generated by the compiler. - // We do diagnose case with kernel name type since the spec requires us to - // do so. - if (!DiagForFreeFunction && NSDecl && NSDecl->isStdNamespace()) - return NotForwardDeclarableReason::StdNamespace; - DeclCtx = DeclCtx->getParent(); - } - } - - // Check if the we've met a Tag declaration local to a non-namespace scope - // (i.e. Inside a function or within another Tag etc). - if (const auto *Tag = dyn_cast(DeclToCheck)) { - if (Tag->getIdentifier() == nullptr) - return NotForwardDeclarableReason::UnnamedTag; - if (!DeclCtx->isTranslationUnit()) { - // Diagnose used types without complete definition i.e. - // int main() { - // class KernelName1; - // parallel_for(..); - // } - // For kernel name type This case can only be diagnosed during host - // compilation because the integration header is required to distinguish - // between the invalid code (above) and the following valid code: - // int main() { - // parallel_for(..); - // } - // The device compiler forward declares both KernelName1 and - // KernelName2 in the integration header as ::KernelName1 and - // ::KernelName2. The problem with the former case is the additional - // declaration 'class KernelName1' in non-global scope. Lookup in this - // case will resolve to ::main::KernelName1 (instead of - // ::KernelName1). Since this is not visible to runtime code that - // submits kernels, this is invalid. - if (Tag->isCompleteDefinition() || - S.getLangOpts().SYCLEnableIntHeaderDiags || DiagForFreeFunction) - return NotForwardDeclarableReason::NotAtNamespaceScope; - } - } - - return NotForwardDeclarableReason::None; -} - -// Tests whether given function is a lambda function or '()' operator used as -// SYCL kernel body function (e.g. in parallel_for). -// NOTE: This is incomplete implemenation. See TODO in the FE TODO list for the -// ESIMD extension. -static bool isSYCLKernelBodyFunction(FunctionDecl *FD) { - return FD->getOverloadedOperator() == OO_Call; -} - -static bool isSYCLUndefinedAllowed(const FunctionDecl *Callee, - const SourceManager &SrcMgr) { - if (!Callee) - return false; - - // The check below requires declaration name, make sure we have it. - if (!Callee->getIdentifier()) - return false; - - // libstdc++-11 introduced an undefined function "void __failed_assertion()" - // which may lead to SemaSYCL check failure. However, this undefined function - // is used to trigger some compilation error when the check fails at compile - // time and will be ignored when the check succeeds. We allow calls to this - // function to support some important std functions in SYCL device. - return (Callee->getName() == LibstdcxxFailedAssertion) && - Callee->getNumParams() == 0 && Callee->getReturnType()->isVoidType() && - SrcMgr.isInSystemHeader(Callee->getLocation()); -} - -// Helper function to report conflicting function attributes. -// F - the function, A1 - function attribute, A2 - the attribute it conflicts -// with. -static void reportConflictingAttrs(SemaSYCL &S, FunctionDecl *F, const Attr *A1, - const Attr *A2) { - S.Diag(F->getLocation(), diag::err_conflicting_sycl_kernel_attributes); - S.Diag(A1->getLocation(), diag::note_conflicting_attribute); - S.Diag(A2->getLocation(), diag::note_conflicting_attribute); - F->setInvalidDecl(); -} - -/// Returns the signed constant integer value represented by given expression -static int64_t getIntExprValue(const Expr *E, ASTContext &Ctx) { - return E->getIntegerConstantExpr(Ctx)->getSExtValue(); -} - -// Collect function attributes related to SYCL. -static void collectSYCLAttributes(SemaSYCL &S, FunctionDecl *FD, - llvm::SmallVectorImpl &Attrs, - bool DirectlyCalled) { - if (!FD->hasAttrs()) - return; - - // In SYCL 2020 mode, the attributes aren't propagated from the function they - // are applied on to the kernel which calls the function. - if (DirectlyCalled) { - llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) { - // FIXME: Make this list self-adapt as new SYCL attributes are added. - return isa(A); - }); - } -} - -class DiagDeviceFunction : public RecursiveASTVisitor { - SemaSYCL &SemaSYCLRef; - const llvm::SmallPtrSetImpl &RecursiveFuncs; - -public: - DiagDeviceFunction( - SemaSYCL &S, - const llvm::SmallPtrSetImpl &RecursiveFuncs) - : RecursiveASTVisitor(), SemaSYCLRef(S), RecursiveFuncs(RecursiveFuncs) {} - - void CheckBody(Stmt *ToBeDiagnosed) { TraverseStmt(ToBeDiagnosed); } - - bool VisitCallExpr(CallExpr *e) { - if (FunctionDecl *Callee = e->getDirectCallee()) { - Callee = Callee->getCanonicalDecl(); - assert(Callee && "Device function canonical decl must be available"); - - // Remember that all SYCL kernel functions have deferred - // instantiation as template functions. It means that - // all functions used by kernel have already been parsed and have - // definitions. - if (RecursiveFuncs.count(Callee)) { - SemaSYCLRef.Diag(e->getExprLoc(), diag::err_sycl_restrict) - << SemaSYCL::KernelCallRecursiveFunction; - SemaSYCLRef.Diag(Callee->getSourceRange().getBegin(), - diag::note_sycl_recursive_function_declared_here) - << SemaSYCL::KernelCallRecursiveFunction; - } - - // Specifically check if the math library function corresponding to this - // builtin is supported for SYCL - unsigned BuiltinID = Callee->getBuiltinID(); - if (BuiltinID && !IsSyclMathFunc(BuiltinID)) { - StringRef Name = - SemaSYCLRef.getASTContext().BuiltinInfo.getName(BuiltinID); - SemaSYCLRef.Diag(e->getExprLoc(), diag::err_builtin_target_unsupported) - << Name << "SYCL device"; - } - } else if (!SemaSYCLRef.getLangOpts().SYCLAllowFuncPtr && - !e->isTypeDependent() && - !isa(e->getCallee())) { - bool MaybeConstantExpr = false; - Expr *NonDirectCallee = e->getCallee(); - if (!NonDirectCallee->isValueDependent()) - MaybeConstantExpr = - NonDirectCallee->isCXX11ConstantExpr(SemaSYCLRef.getASTContext()); - if (!MaybeConstantExpr) - SemaSYCLRef.Diag(e->getExprLoc(), diag::err_sycl_restrict) - << SemaSYCL::KernelCallFunctionPointer; - } - return true; - } - - bool VisitCXXTypeidExpr(CXXTypeidExpr *E) { - SemaSYCLRef.Diag(E->getExprLoc(), diag::err_sycl_restrict) - << SemaSYCL::KernelRTTI; - return true; - } - - bool VisitCXXDynamicCastExpr(const CXXDynamicCastExpr *E) { - SemaSYCLRef.Diag(E->getExprLoc(), diag::err_sycl_restrict) - << SemaSYCL::KernelRTTI; - return true; - } - - // Skip checking rules on variables initialized during constant evaluation. - bool TraverseVarDecl(VarDecl *VD) { - if (VD->isConstexpr()) - return true; - return RecursiveASTVisitor::TraverseVarDecl(VD); - } - - // Skip checking rules on template arguments, since these are constant - // expressions. - bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) { - return true; - } - - // Skip checking the static assert, both components are required to be - // constant expressions. - bool TraverseStaticAssertDecl(StaticAssertDecl *D) { return true; } - - // Make sure we skip the condition of the case, since that is a constant - // expression. - bool TraverseCaseStmt(CaseStmt *S) { - return TraverseStmt(S->getSubStmt()); - } - - // Skip checking the size expr, since a constant array type loc's size expr is - // a constant expression. - bool TraverseConstantArrayTypeLoc(const ConstantArrayTypeLoc &ArrLoc) { - return true; - } - - bool TraverseIfStmt(IfStmt *S) { - if (std::optional ActiveStmt = - S->getNondiscardedCase(SemaSYCLRef.getASTContext())) { - if (*ActiveStmt) - return TraverseStmt(*ActiveStmt); - return true; - } - return RecursiveASTVisitor::TraverseIfStmt(S); - } -}; - -// This type manages the list of device functions and recursive functions, as -// well as an entry point for attribute collection, for the translation unit -// during MarkDevices. On construction, this type makes sure that all of the -// root-device functions, (that is, those marked with SYCL_EXTERNAL) are -// collected. On destruction, it manages and runs the diagnostics required. -// When processing individual kernel/external functions, the -// SingleDeviceFunctionTracker type updates this type. -class DeviceFunctionTracker { - friend class SingleDeviceFunctionTracker; - CallGraph CG; - SemaSYCL &SemaSYCLRef; - // The list of functions used on the device, kept so we can diagnose on them - // later. - llvm::SmallPtrSet DeviceFunctions; - llvm::SmallPtrSet RecursiveFunctions; - - void CollectSyclExternalFuncs() { - for (CallGraphNode::CallRecord Record : CG.getRoot()->callees()) - if (auto *FD = dyn_cast(Record.Callee->getDecl())) - if (FD->hasBody() && FD->hasAttr()) - SemaSYCLRef.addSyclDeviceDecl(FD); - } - - CallGraphNode *getNodeForKernel(FunctionDecl *Kernel) { - assert(CG.getNode(Kernel) && "No call graph entry for a kernel?"); - return CG.getNode(Kernel); - } - - void AddSingleFunction( - const llvm::SmallPtrSetImpl &DevFuncs, - const llvm::SmallPtrSetImpl &Recursive) { - DeviceFunctions.insert(DevFuncs.begin(), DevFuncs.end()); - RecursiveFunctions.insert(Recursive.begin(), Recursive.end()); - } - -public: - DeviceFunctionTracker(SemaSYCL &S) : SemaSYCLRef(S) { - CG.setSkipConstantExpressions(S.getASTContext()); - CG.addToCallGraph(S.getASTContext().getTranslationUnitDecl()); - CollectSyclExternalFuncs(); - } - - ~DeviceFunctionTracker() { - DiagDeviceFunction Diagnoser{SemaSYCLRef, RecursiveFunctions}; - for (const FunctionDecl *FD : DeviceFunctions) - if (const FunctionDecl *Def = FD->getDefinition()) - Diagnoser.CheckBody(Def->getBody()); - } -}; - -/// This function checks whether given DeclContext contains a topmost -/// namespace with name "sycl". -static bool isDeclaredInSYCLNamespace(const Decl *D) { - const DeclContext *DC = D->getDeclContext()->getEnclosingNamespaceContext(); - const auto *ND = dyn_cast(DC); - // If this is not a namespace, then we are done. - if (!ND) - return false; - - // While it is a namespace, find its parent scope. - while (const DeclContext *Parent = ND->getParent()) { - if (!isa(Parent)) - break; - ND = cast(Parent); - } - - return ND && ND->getName() == "sycl"; -} - -static bool isSYCLPrivateMemoryVar(VarDecl *VD) { - return SemaSYCL::isSyclType(VD->getType(), SYCLTypeAttr::private_memory); -} - -static void addScopeAttrToLocalVars(FunctionDecl &F) { - for (Decl *D : F.decls()) { - VarDecl *VD = dyn_cast(D); - - if (!VD || isa(VD) || - VD->getStorageDuration() != StorageDuration::SD_Automatic) - continue; - // Local variables of private_memory type in the WG scope still have WI - // scope, all the rest - WG scope. Simple logic - // "if no scope than it is WG scope" won't work, because compiler may add - // locals not declared in user code (lambda object parameter, byval - // arguments) which will result in alloca w/o any attribute, so need WI - // scope too. - SYCLScopeAttr::Level L = isSYCLPrivateMemoryVar(VD) - ? SYCLScopeAttr::Level::WorkItem - : SYCLScopeAttr::Level::WorkGroup; - VD->addAttr(SYCLScopeAttr::CreateImplicit(F.getASTContext(), L)); - } -} - -// This type does the heavy lifting for the management of device functions, -// recursive function detection, and attribute collection for a single -// kernel/external function. It walks the callgraph to find all functions that -// are called, marks the recursive-functions, and figures out the list of -// attributes that apply to this kernel. -// -// Upon destruction, this type updates the DeviceFunctionTracker. -class SingleDeviceFunctionTracker { - DeviceFunctionTracker &Parent; - FunctionDecl *SYCLKernel = nullptr; - FunctionDecl *KernelBody = nullptr; - llvm::SmallPtrSet DeviceFunctions; - llvm::SmallPtrSet RecursiveFunctions; - llvm::SmallVector CollectedAttributes; - - FunctionDecl *GetFDFromNode(CallGraphNode *Node) { - FunctionDecl *FD = Node->getDecl()->getAsFunction(); - if (!FD) - return nullptr; - - return FD->getMostRecentDecl(); - } - - void VisitCallNode(CallGraphNode *Node, FunctionDecl *CurrentDecl, - llvm::SmallVectorImpl &CallStack) { - // If this isn't a function, I don't think there is anything we can do here. - if (!CurrentDecl) - return; - - // Determine if this is a recursive function. If so, we're done. - if (llvm::is_contained(CallStack, CurrentDecl)) { - RecursiveFunctions.insert(CurrentDecl->getCanonicalDecl()); - return; - } - - // If this is a routine that is not defined and it does not have either - // a SYCLKernel or SYCLDevice attribute on it, add it to the set of - // routines potentially reachable on device. This is to diagnose such - // cases later in finalizeSYCLDelayedAnalysis(). - if (!CurrentDecl->isDefined() && !CurrentDecl->hasAttr() && - !CurrentDecl->hasAttr()) - Parent.SemaSYCLRef.addFDToReachableFromSyclDevice(CurrentDecl, - CallStack.back()); - - // If this is a parallel_for_work_item that is declared in the - // sycl namespace, mark it with the WorkItem scope attribute. - // Note: Here, we assume that this is called from within a - // parallel_for_work_group; it is undefined to call it otherwise. - // We deliberately do not diagnose a violation. - // The following changes have also been added: - // 1. The function inside which the parallel_for_work_item exists is - // marked with WorkGroup scope attribute, if not present already. - // 2. The local variables inside the function are marked with appropriate - // scope. - if (CurrentDecl->getIdentifier() && - CurrentDecl->getIdentifier()->getName() == "parallel_for_work_item" && - isDeclaredInSYCLNamespace(CurrentDecl) && - !CurrentDecl->hasAttr()) { - CurrentDecl->addAttr(SYCLScopeAttr::CreateImplicit( - Parent.SemaSYCLRef.getASTContext(), SYCLScopeAttr::Level::WorkItem)); - FunctionDecl *Caller = CallStack.back(); - if (!Caller->hasAttr()) { - Caller->addAttr( - SYCLScopeAttr::CreateImplicit(Parent.SemaSYCLRef.getASTContext(), - SYCLScopeAttr::Level::WorkGroup)); - addScopeAttrToLocalVars(*Caller); - } - } - - // We previously thought we could skip this function if we'd seen it before, - // but if we haven't seen it before in this call graph, we can end up - // missing a recursive call. SO, we have to revisit call-graphs we've - // already seen, just in case it ALSO has recursion. For example: - // void recurse1(); - // void recurse2() { recurse1(); } - // void recurse1() { recurse2(); } - // void CallerInKernel() { recurse1(); recurse2(); } - // When checking 'recurse1', we'd have ended up 'visiting' recurse2 without - // realizing it was recursive, since we never went into the - // child-of-its-child, since THAT was recursive and exited early out of - // necessity. - // Then when we go to visit the kernel's call to recurse2, we would - // immediately escape not noticing it was recursive. SO, we have to do a - // little extra work in this case, and make sure we visit the entire call - // graph. - DeviceFunctions.insert(CurrentDecl); - - // Collect attributes for functions that aren't the root kernel. - if (!CallStack.empty()) { - bool DirectlyCalled = CallStack.size() == 1; - collectSYCLAttributes(Parent.SemaSYCLRef, CurrentDecl, - CollectedAttributes, DirectlyCalled); - } - - // Calculate the kernel body. Note the 'isSYCLKernelBodyFunction' only - // tests that it is operator(), so hopefully this doesn't get us too many - // false-positives. - if (isSYCLKernelBodyFunction(CurrentDecl)) { - // This is a direct callee of the kernel. - if (CallStack.size() == 1 && - CallStack.back()->hasAttr()) { - assert(!KernelBody && "inconsistent call graph - only one kernel body " - "function can be called"); - KernelBody = CurrentDecl; - } else if (CallStack.size() == 2 && KernelBody == CallStack.back()) { - // To implement rounding-up of a parallel-for range the - // SYCL header implementation modifies the kernel call like this: - // auto Wrapper = [=](TransformedArgType Arg) { - // if (Arg[0] >= NumWorkItems[0]) - // return; - // Arg.set_allowed_range(NumWorkItems); - // KernelFunc(Arg); - // }; - // - // This transformation leads to a condition where a kernel body - // function becomes callable from a new kernel body function. - // Hence this test. - // FIXME: We need to be more selective here, this can be hit by simply - // having a kernel lambda with a lambda call inside of it. - KernelBody = CurrentDecl; - } - if (KernelBody) - Parent.SemaSYCLRef.addSYCLKernelFunction(KernelBody); - } - - // Recurse. - CallStack.push_back(CurrentDecl); - llvm::SmallPtrSet SeenCallees; - for (CallGraphNode *CI : Node->callees()) { - FunctionDecl *CurFD = GetFDFromNode(CI); - - // Make sure we only visit each callee 1x from this function to avoid very - // time consuming template recursion cases. - if (!llvm::is_contained(SeenCallees, CurFD)) { - VisitCallNode(CI, CurFD, CallStack); - SeenCallees.insert(CurFD); - } - } - CallStack.pop_back(); - } - - // Function to walk the call graph and identify the important information. - void Init() { - CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel); - llvm::SmallVector CallStack; - VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack); - - // Always inline the KernelBody in the kernel entry point. For ESIMD - // inlining is handled later down the pipeline. - if (KernelBody && - Parent.SemaSYCLRef.getLangOpts().SYCLForceInlineKernelLambda && - !KernelBody->hasAttr() && - !KernelBody->hasAttr() && - !KernelBody->hasAttr()) { - KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit( - KernelBody->getASTContext(), {}, AlwaysInlineAttr::Keyword_forceinline)); - } - } - -public: - SingleDeviceFunctionTracker(DeviceFunctionTracker &P, Decl *Kernel) - : Parent(P), SYCLKernel(Kernel->getAsFunction()) { - Init(); - } - - FunctionDecl *GetSYCLKernel() { return SYCLKernel; } - - FunctionDecl *GetKernelBody() { return KernelBody; } - - llvm::SmallVectorImpl &GetCollectedAttributes() { - return CollectedAttributes; - } - - llvm::SmallPtrSetImpl &GetDeviceFunctions() { - return DeviceFunctions; - } - - ~SingleDeviceFunctionTracker() { - Parent.AddSingleFunction(DeviceFunctions, RecursiveFunctions); - } -}; - -class KernelBodyTransform : public TreeTransform { -public: - KernelBodyTransform(std::pair &MPair, - Sema &S) - : TreeTransform(S), MappingPair(MPair), SemaRef(S) {} - bool AlwaysRebuild() { return true; } - - ExprResult TransformDeclRefExpr(DeclRefExpr *DRE) { - auto Ref = dyn_cast(DRE->getDecl()); - if (Ref && Ref == MappingPair.first) { - auto NewDecl = MappingPair.second; - return DeclRefExpr::Create( - SemaRef.getASTContext(), DRE->getQualifierLoc(), - DRE->getTemplateKeywordLoc(), NewDecl, false, DRE->getNameInfo(), - NewDecl->getType(), DRE->getValueKind()); - } - return DRE; - } - -private: - std::pair MappingPair; - Sema &SemaRef; -}; - -/// Creates a kernel parameter descriptor -/// \param Src field declaration to construct name from -/// \param Ty the desired parameter type -/// \return the constructed descriptor -static ParamDesc makeParamDesc(const FieldDecl *Src, QualType Ty) { - ASTContext &Ctx = Src->getASTContext(); - std::string Name = (Twine("_arg_") + Src->getName()).str(); - return std::make_tuple(Ty, &Ctx.Idents.get(Name), - Ctx.getTrivialTypeSourceInfo(Ty)); -} -static ParamDesc makeParamDesc(const ParmVarDecl *Src, QualType Ty) { - ASTContext &Ctx = Src->getASTContext(); - std::string Name = (Twine("__arg_") + Src->getName()).str(); - return std::make_tuple(Ty, &Ctx.Idents.get(Name), - Ctx.getTrivialTypeSourceInfo(Ty)); -} - -static ParamDesc makeParamDesc(ASTContext &Ctx, StringRef Name, QualType Ty) { - return std::make_tuple(Ty, &Ctx.Idents.get(Name), - Ctx.getTrivialTypeSourceInfo(Ty)); -} - -static void unsupportedFreeFunctionParamType() { - llvm::report_fatal_error("Only scalars and pointers are permitted as " - "free function parameters"); -} - -class MarkWIScopeFnVisitor : public RecursiveASTVisitor { -public: - MarkWIScopeFnVisitor(ASTContext &Ctx) : Ctx(Ctx) {} - - bool VisitCXXMemberCallExpr(CXXMemberCallExpr *Call) { - FunctionDecl *Callee = Call->getDirectCallee(); - if (!Callee) - // not a direct call - continue search - return true; - QualType Ty = Ctx.getRecordType(Call->getRecordDecl()); - if (!SemaSYCL::isSyclType(Ty, SYCLTypeAttr::group)) - // not a member of sycl::group - continue search - return true; - auto Name = Callee->getName(); - if (Name != "wait_for" || - Callee->hasAttr()) - return true; - // it is a call to sycl::group::wait_for - mark the callee - Callee->addAttr( - SYCLScopeAttr::CreateImplicit(Ctx, SYCLScopeAttr::Level::WorkItem)); - // continue search as there can be other wait_for calls - return true; - } - -private: - ASTContext &Ctx; -}; - -/// Return method by name -static CXXMethodDecl *getMethodByName(const CXXRecordDecl *CRD, - StringRef MethodName) { - CXXMethodDecl *Method; - auto It = std::find_if(CRD->methods().begin(), CRD->methods().end(), - [MethodName](const CXXMethodDecl *Method) { - return Method->getNameAsString() == MethodName; - }); - Method = (It != CRD->methods().end()) ? *It : nullptr; - return Method; -} - -static KernelInvocationKind -getKernelInvocationKind(FunctionDecl *KernelCallerFunc) { - return llvm::StringSwitch(KernelCallerFunc->getName()) - .Case("kernel_single_task", InvokeSingleTask) - .Case("kernel_parallel_for", InvokeParallelFor) - .Case("kernel_parallel_for_work_group", InvokeParallelForWorkGroup) - .Default(InvokeUnknown); -} - -// The SYCL kernel's 'object type' used for diagnostics and naming/mangling is -// the first parameter to a function template using the sycl_kernel -// attribute. In SYCL 1.2.1, this was passed by value, -// and in SYCL 2020, it is passed by reference. -static QualType GetSYCLKernelObjectType(const FunctionDecl *KernelCaller) { - assert(KernelCaller->getNumParams() > 0 && "Insufficient kernel parameters"); - QualType KernelParamTy = KernelCaller->getParamDecl(0)->getType(); - - // SYCL 2020 kernels are passed by reference. - if (KernelParamTy->isReferenceType()) - KernelParamTy = KernelParamTy->getPointeeType(); - - // SYCL 1.2.1 - return KernelParamTy.getUnqualifiedType(); -} - -/// \return the target of given SYCL accessor type -static target getAccessTarget(QualType FieldTy, - const ClassTemplateSpecializationDecl *AccTy) { - if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::local_accessor)) - return local; - - return static_cast( - AccTy->getTemplateArgs()[3].getAsIntegral().getExtValue()); -} - -// FIXME: Free functions must have void return type and be declared at file -// scope, outside any namespaces. -static bool isFreeFunction(SemaSYCL &SemaSYCLRef, const FunctionDecl *FD) { - for (auto *IRAttr : FD->specific_attrs()) { - SmallVector, 4> NameValuePairs = - IRAttr->getAttributeNameValuePairs(SemaSYCLRef.getASTContext()); - for (const auto &NameValuePair : NameValuePairs) { - if (NameValuePair.first == "sycl-nd-range-kernel" || - NameValuePair.first == "sycl-single-task-kernel") { - if (!FD->getReturnType()->isVoidType()) { - llvm::report_fatal_error( - "Only functions at file scope with void return " - "type are permitted as free functions"); - return false; - } - return true; - } - } - } - return false; -} - -static int getFreeFunctionRangeDim(SemaSYCL &SemaSYCLRef, - const FunctionDecl *FD) { - for (auto *IRAttr : FD->specific_attrs()) { - SmallVector, 4> NameValuePairs = - IRAttr->getAttributeNameValuePairs(SemaSYCLRef.getASTContext()); - for (const auto &NameValuePair : NameValuePairs) { - if (NameValuePair.first == "sycl-nd-range-kernel") - return std::stoi(NameValuePair.second); - if (NameValuePair.first == "sycl-single-task-kernel") - return 0; - } - } - return false; -} - -// Creates a name for the free function kernel function. -// Consider a free function named "MyFunction". The normal device function will -// be given its mangled name, say "_Z10MyFunctionIiEvPT_S0_". The corresponding -// kernel function for this free function will be named -// "_Z24__sycl_kernel_MyFunctionIiEvPT_S0_". This is the mangled name of a -// fictitious function that has the same template and function parameters as the -// original free function but with identifier prefixed with __sycl_kernel_. -// We generate this name by starting with the mangled name of the free function -// and adjusting it textually to simulate the __sycl_kernel_ prefix. -// Because free functions are allowed only at file scope and cannot be within -// namespaces the mangled name has the format _Z... where -// length is the identifier's length. The text manipulation inserts the prefix -// __sycl_kernel_ and adjusts the length, leaving the rest of the name as-is. -static std::pair constructFreeFunctionKernelName( - SemaSYCL &SemaSYCLRef, const FunctionDecl *FreeFunc, MangleContext &MC) { - SmallString<256> Result; - llvm::raw_svector_ostream Out(Result); - std::string NewName; - std::string StableName; - - // Handle extern "C" - if (FreeFunc->getLanguageLinkage() == CLanguageLinkage) { - const IdentifierInfo *II = FreeFunc->getIdentifier(); - NewName = "__sycl_kernel_" + II->getName().str(); - } else { - MC.mangleName(FreeFunc, Out); - std::string MangledName(Out.str()); - size_t StartNums = MangledName.find_first_of("0123456789"); - size_t EndNums = MangledName.find_first_not_of("0123456789", StartNums); - size_t NameLength = - std::stoi(MangledName.substr(StartNums, EndNums - StartNums)); - size_t NewNameLength = 14 /*length of __sycl_kernel_*/ + NameLength; - NewName = MangledName.substr(0, StartNums) + std::to_string(NewNameLength) + - "__sycl_kernel_" + MangledName.substr(EndNums); - } - StableName = NewName; - return {NewName, StableName}; -} - -// The first template argument to the kernel caller function is used to identify -// the kernel itself. -static QualType calculateKernelNameType(ASTContext &Ctx, - const FunctionDecl *KernelCallerFunc) { - const TemplateArgumentList *TAL = - KernelCallerFunc->getTemplateSpecializationArgs(); - assert(TAL && "No template argument info"); - return TAL->get(0).getAsType().getCanonicalType(); -} - -// Gets a name for the OpenCL kernel function, calculated from the first -// template argument of the kernel caller function. -static std::pair -constructKernelName(SemaSYCL &S, const FunctionDecl *KernelCallerFunc, - MangleContext &MC) { - QualType KernelNameType = - calculateKernelNameType(S.getASTContext(), KernelCallerFunc); - - SmallString<256> Result; - llvm::raw_svector_ostream Out(Result); - - MC.mangleCanonicalTypeName(KernelNameType, Out); - std::string MangledName(Out.str()); - - std::string StableName = - SYCLUniqueStableNameExpr::ComputeName(S.getASTContext(), KernelNameType); - - // For NativeCPU the kernel name is set to the stable GNU-mangled name - // because the default mangling may be different, for example on Windows. - // This is needed for compiling kernels for multiple SYCL targets to ensure - // the same kernel name can be used for kernel lookup in different target - // binaries. This assumes that all SYCL targets use the same mangling - // produced for the stable name. - // Todo: Check if this assumption is valid, and if it would be better - // instead to always compile the NativeCPU device code in GNU mode which - // may cause issues when compiling headers with non-standard extensions - // written for compilers with different C++ ABIs (like MS VS). - if (S.getLangOpts().SYCLIsNativeCPU) { - MangledName = StableName; - } - - return {MangledName, StableName}; -} - -static bool isDefaultSPIRArch(ASTContext &Context) { - llvm::Triple T = Context.getTargetInfo().getTriple(); - if (T.isSPIR() && T.getSubArch() == llvm::Triple::NoSubArch) - return true; - return false; -} - -static ParmVarDecl *getSyclKernelHandlerArg(FunctionDecl *KernelCallerFunc) { - // Specialization constants in SYCL 2020 are not captured by lambda and - // accessed through new optional lambda argument kernel_handler - auto IsHandlerLambda = [](ParmVarDecl *PVD) { - return SemaSYCL::isSyclType(PVD->getType(), SYCLTypeAttr::kernel_handler); - }; - - assert(llvm::count_if(KernelCallerFunc->parameters(), IsHandlerLambda) <= 1 && - "Multiple kernel_handler parameters"); - - auto KHArg = llvm::find_if(KernelCallerFunc->parameters(), IsHandlerLambda); - - return (KHArg != KernelCallerFunc->param_end()) ? *KHArg : nullptr; -} - -static bool isReadOnlyAccessor(const TemplateArgument &AccessModeArg) { - const auto *AccessModeArgEnumType = - AccessModeArg.getIntegralType()->castAs(); - const EnumDecl *ED = AccessModeArgEnumType->getDecl(); - - auto ReadOnly = - llvm::find_if(ED->enumerators(), [&](const EnumConstantDecl *E) { - return E->getName() == "read"; - }); - - return ReadOnly != ED->enumerator_end() && - (*ReadOnly)->getInitVal() == AccessModeArg.getAsIntegral(); -} - -// anonymous namespace so these don't get linkage. -namespace { - -template struct bind_param { using type = T; }; - -template <> struct bind_param { - using type = const CXXBaseSpecifier &; -}; - -template <> struct bind_param { using type = FieldDecl *; }; - -template <> struct bind_param { using type = FieldDecl *; }; - -template using bind_param_t = typename bind_param::type; - -class KernelObjVisitor { - SemaSYCL &SemaSYCLRef; - - template - void VisitUnionImpl(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, HandlerTys &... Handlers) { - (void)std::initializer_list{ - (Handlers.enterUnion(Owner, Parent), 0)...}; - VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...); - (void)std::initializer_list{ - (Handlers.leaveUnion(Owner, Parent), 0)...}; - } - - // These enable handler execution only when previous Handlers succeed. - template - bool handleField(FieldDecl *FD, QualType FDTy, Tn &&... tn) { - bool result = true; - (void)std::initializer_list{(result = result && tn(FD, FDTy), 0)...}; - return result; - } - template - bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&... tn) { - bool result = true; - std::initializer_list{(result = result && tn(BD, BDTy), 0)...}; - return result; - } - -// This definition using std::bind is necessary because of a gcc 7.x bug. -#define KF_FOR_EACH(FUNC, Item, Qt) \ - handleField( \ - Item, Qt, \ - std::bind(static_cast::*)( \ - bind_param_t, QualType)>( \ - &std::decay_t::FUNC), \ - std::ref(Handlers), _1, _2)...) - - // The following simpler definition works with gcc 8.x and later. - //#define KF_FOR_EACH(FUNC) \ -// handleField(Field, FieldTy, ([&](FieldDecl *FD, QualType FDTy) { \ -// return Handlers.f(FD, FDTy); \ -// })...) - - // This enables handler execution only when previous Handlers succeed. - template - bool handleParam(ParmVarDecl *PD, QualType PDTy, Tn &&...tn) { - bool result = true; - (void)std::initializer_list{(result = result && tn(PD, PDTy), 0)...}; - return result; - } - - // This definition using std::bind is necessary because of a gcc 7.x bug. -#define KP_FOR_EACH(FUNC, Item, Qt) \ - handleParam( \ - Item, Qt, \ - std::bind(static_cast::*)( \ - bind_param_t, QualType)>( \ - &std::decay_t::FUNC), \ - std::ref(Handlers), _1, _2)...) - - // Parent contains the FieldDecl or CXXBaseSpecifier that was used to enter - // the Wrapper structure that we're currently visiting. Owner is the parent - // type (which doesn't exist in cases where it is a FieldDecl in the - // 'root'), and Wrapper is the current struct being unwrapped. - template - void visitComplexRecord(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &... Handlers) { - (void)std::initializer_list{ - (Handlers.enterStruct(Owner, Parent, RecordTy), 0)...}; - VisitRecordHelper(Wrapper, Wrapper->bases(), Handlers...); - VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...); - (void)std::initializer_list{ - (Handlers.leaveStruct(Owner, Parent, RecordTy), 0)...}; - } - - template - void visitSimpleRecord(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &... Handlers) { - (void)std::initializer_list{ - (Handlers.handleNonDecompStruct(Owner, Parent, RecordTy), 0)...}; - } - - template - void visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, QualType RecordTy, - HandlerTys &... Handlers); - - template - void VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, HandlerTys &... Handlers); - - template - void VisitRecordHelper(const CXXRecordDecl *Owner, - clang::CXXRecordDecl::base_class_const_range Range, - HandlerTys &... Handlers) { - for (const auto &Base : Range) { - QualType BaseTy = Base.getType(); - // Handle accessor class as base - if (isSyclSpecialType(BaseTy, SemaSYCLRef)) - (void)std::initializer_list{ - (Handlers.handleSyclSpecialType(Owner, Base, BaseTy), 0)...}; - else - // For all other bases, visit the record - visitRecord(Owner, Base, BaseTy->getAsCXXRecordDecl(), BaseTy, - Handlers...); - } - } - - template - void VisitRecordHelper(const CXXRecordDecl *Owner, - RecordDecl::field_range Range, - HandlerTys &... Handlers) { - VisitRecordFields(Owner, Handlers...); - } - - template - void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField, - QualType ElementTy, uint64_t Index, - HandlerTys &... Handlers) { - (void)std::initializer_list{ - (Handlers.nextElement(ElementTy, Index), 0)...}; - visitField(Owner, ArrayField, ElementTy, Handlers...); - } - - template - void visitFirstArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, - QualType ElementTy, HandlerTys &... Handlers) { - visitArrayElementImpl(Owner, ArrayField, ElementTy, 0, Handlers...); - } - template - void visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, - QualType ElementTy, uint64_t Index, - HandlerTys &... Handlers); - - template - void visitSimpleArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &... Handlers) { - (void)std::initializer_list{ - (Handlers.handleSimpleArrayType(Field, ArrayTy), 0)...}; - } - - template - void visitComplexArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &... Handlers) { - // Array workflow is: - // handleArrayType - // enterArray - // nextElement - // VisitField (same as before, note that The FieldDecl is the of array - // itself, not the element) - // ... repeat per element, opt-out for duplicates. - // leaveArray - - if (!KF_FOR_EACH(handleArrayType, Field, ArrayTy)) - return; - - const ConstantArrayType *CAT = - SemaSYCLRef.getASTContext().getAsConstantArrayType(ArrayTy); - assert(CAT && "Should only be called on constant-size array."); - QualType ET = CAT->getElementType(); - uint64_t ElemCount = CAT->getSize().getZExtValue(); - - (void)std::initializer_list{ - (Handlers.enterArray(Field, ArrayTy, ET), 0)...}; - - visitFirstArrayElement(Owner, Field, ET, Handlers...); - for (uint64_t Index = 1; Index < ElemCount; ++Index) - visitNthArrayElement(Owner, Field, ET, Index, Handlers...); - - (void)std::initializer_list{ - (Handlers.leaveArray(Field, ArrayTy, ET), 0)...}; - } - - template - void visitField(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType FieldTy, HandlerTys &... Handlers) { - if (isSyclSpecialType(FieldTy, SemaSYCLRef)) - KF_FOR_EACH(handleSyclSpecialType, Field, FieldTy); - else if (FieldTy->isStructureOrClassType()) { - if (KF_FOR_EACH(handleStructType, Field, FieldTy)) { - CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); - visitRecord(Owner, Field, RD, FieldTy, Handlers...); - } - } else if (FieldTy->isUnionType()) { - if (KF_FOR_EACH(handleUnionType, Field, FieldTy)) { - CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); - VisitUnion(Owner, Field, RD, Handlers...); - } - } else if (FieldTy->isReferenceType()) - KF_FOR_EACH(handleReferenceType, Field, FieldTy); - else if (FieldTy->isPointerType()) - KF_FOR_EACH(handlePointerType, Field, FieldTy); - else if (FieldTy->isArrayType()) - visitArray(Owner, Field, FieldTy, Handlers...); - else if (FieldTy->isScalarType() || FieldTy->isVectorType()) - KF_FOR_EACH(handleScalarType, Field, FieldTy); - else - KF_FOR_EACH(handleOtherType, Field, FieldTy); - } - - template - void visitParam(ParmVarDecl *Param, QualType ParamTy, - HandlerTys &...Handlers) { - if (isSyclSpecialType(ParamTy, SemaSYCLRef)) - KP_FOR_EACH(handleOtherType, Param, ParamTy); - else if (ParamTy->isStructureOrClassType()) { - if (KP_FOR_EACH(handleStructType, Param, ParamTy)) { - CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl(); - visitRecord(nullptr, Param, RD, ParamTy, Handlers...); - } - } else if (ParamTy->isUnionType()) - KP_FOR_EACH(handleOtherType, Param, ParamTy); - else if (ParamTy->isReferenceType()) - KP_FOR_EACH(handleOtherType, Param, ParamTy); - else if (ParamTy->isPointerType()) - KP_FOR_EACH(handlePointerType, Param, ParamTy); - else if (ParamTy->isArrayType()) - KP_FOR_EACH(handleOtherType, Param, ParamTy); - else if (ParamTy->isScalarType()) - KP_FOR_EACH(handleScalarType, Param, ParamTy); - else - KP_FOR_EACH(handleOtherType, Param, ParamTy); - } - -public: - KernelObjVisitor(SemaSYCL &S) : SemaSYCLRef(S) {} - - static bool useTopLevelKernelObj(const CXXRecordDecl *KernelObj) { - // If the kernel is empty, "decompose" it so we don't generate arguments. - if (KernelObj->isEmpty()) - return false; - // FIXME: Workaround to not change large number of tests - // this is covered by the test below. - if (targetRequiresNewType(KernelObj->getASTContext())) - return false; - if (KernelObj->hasAttr() || - KernelObj->hasAttr()) - return false; - return true; - } - - template - void visitTopLevelRecord(const CXXRecordDecl *Owner, QualType RecordTy, - HandlerTys &...Handlers) { - (void)std::initializer_list{ - (Handlers.handleTopLevelStruct(Owner, RecordTy), 0)...}; - } - - template - void VisitRecordBases(const CXXRecordDecl *KernelFunctor, - HandlerTys &... Handlers) { - VisitRecordHelper(KernelFunctor, KernelFunctor->bases(), Handlers...); - } - - // A visitor function that dispatches to functions as defined in - // SyclKernelFieldHandler for the purposes of kernel generation. - template - void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &... Handlers) { - for (const auto Field : Owner->fields()) - visitField(Owner, Field, Field->getType(), Handlers...); - } - - template - void visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &...Handlers); - - // A visitor for Kernel object to functions as defined in - // SyclKernelFieldHandler by iterating over fields and bases - // if they require decomposition or new type. - template - void VisitKernelRecord(const CXXRecordDecl *KernelObj, - QualType KernelFunctorTy, HandlerTys &...Handlers) { - if (!useTopLevelKernelObj(KernelObj)) { - VisitRecordBases(KernelObj, Handlers...); - VisitRecordFields(KernelObj, Handlers...); - } else { - visitTopLevelRecord(KernelObj, KernelFunctorTy, Handlers...); - } - } - - // A visitor function that dispatches to functions as defined in - // SyclKernelFieldHandler by iterating over a free function parameter list. - template - void VisitFunctionParameters(FunctionDecl *FreeFunc, - HandlerTys &...Handlers) { - for (ParmVarDecl *Param : FreeFunc->parameters()) - visitParam(Param, Param->getType(), Handlers...); - } - -#undef KF_FOR_EACH -#undef KP_FOR_EACH -}; - -// A base type that the SYCL OpenCL Kernel construction task uses to implement -// individual tasks. -class SyclKernelFieldHandlerBase { -public: - static constexpr const bool VisitUnionBody = false; - static constexpr const bool VisitNthArrayElement = true; - // Opt-in based on whether we should visit inside simple containers (structs, - // arrays). All of the 'check' types should likely be true, the int-header, - // and kernel decl creation types should not. - static constexpr const bool VisitInsideSimpleContainers = true; - static constexpr const bool VisitInsideSimpleContainersWithPointer = false; - // Mark these virtual so that we can use override in the implementer classes, - // despite virtual dispatch never being used. - - // SYCL special class can be a base class or a field decl, so both must be - // handled. - virtual bool handleSyclSpecialType(const CXXRecordDecl *, - const CXXBaseSpecifier &, QualType) { - return true; - } - virtual bool handleSyclSpecialType(FieldDecl *, QualType) { return true; } - virtual bool handleSyclSpecialType(ParmVarDecl *, QualType) { return true; } - - virtual bool handleStructType(FieldDecl *, QualType) { return true; } - virtual bool handleStructType(ParmVarDecl *, QualType) { return true; } - virtual bool handleUnionType(FieldDecl *, QualType) { return true; } - virtual bool handleUnionType(ParmVarDecl *, QualType) { return true; } - virtual bool handleReferenceType(FieldDecl *, QualType) { return true; } - virtual bool handleReferenceType(ParmVarDecl *, QualType) { return true; } - virtual bool handlePointerType(FieldDecl *, QualType) { return true; } - virtual bool handlePointerType(ParmVarDecl *, QualType) { return true; } - virtual bool handleArrayType(FieldDecl *, QualType) { return true; } - virtual bool handleArrayType(ParmVarDecl *, QualType) { return true; } - virtual bool handleScalarType(FieldDecl *, QualType) { return true; } - virtual bool handleScalarType(ParmVarDecl *, QualType) { return true; } - // Most handlers shouldn't be handling this, just the field checker. - virtual bool handleOtherType(FieldDecl *, QualType) { return true; } - virtual bool handleOtherType(ParmVarDecl *, QualType) { return true; } - - // Handle the SYCL kernel as a whole. This applies only when the target can - // support pointer to the generic address space as arguments and the functor - // doesn't have any SYCL special types. - virtual bool handleTopLevelStruct(const CXXRecordDecl *, QualType) { - return true; - } - - // Handle a simple struct that doesn't need to be decomposed, only called on - // handlers with VisitInsideSimpleContainers as false. Replaces - // handleStructType, enterStruct, leaveStruct, and visiting of sub-elements. - virtual bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *, - QualType) { - return true; - } - - virtual bool handleNonDecompStruct(const CXXRecordDecl *, ParmVarDecl *, - QualType) { - return true; - } - - virtual bool handleNonDecompStruct(const CXXRecordDecl *, - const CXXBaseSpecifier &, QualType) { - return true; - } - - // Instead of handleArrayType, enterArray, leaveArray, and nextElement (plus - // descending down the elements), this function gets called in the event of an - // array containing simple elements (even in the case of an MD array). - virtual bool handleSimpleArrayType(FieldDecl *, QualType) { return true; } - - // The following are only used for keeping track of where we are in the base - // class/field graph. Int Headers use this to calculate offset, most others - // don't have a need for these. - - virtual bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) { - return true; - } - virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) { - return true; - } - virtual bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) { - return true; - } - virtual bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) { - return true; - } - virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, - QualType) { - return true; - } - virtual bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, - QualType) { - return true; - } - virtual bool enterUnion(const CXXRecordDecl *, FieldDecl *) { return true; } - virtual bool leaveUnion(const CXXRecordDecl *, FieldDecl *) { return true; } - virtual bool enterUnion(const CXXRecordDecl *, ParmVarDecl *) { return true; } - virtual bool leaveUnion(const CXXRecordDecl *, ParmVarDecl *) { return true; } - - // The following are used for stepping through array elements. - virtual bool enterArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) { - return true; - } - virtual bool leaveArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) { - return true; - } - virtual bool enterArray(ParmVarDecl *, QualType ArrayTy, QualType ElementTy) { - return true; - } - virtual bool leaveArray(ParmVarDecl *, QualType ArrayTy, QualType ElementTy) { - return true; - } - - virtual bool nextElement(QualType, uint64_t) { return true; } - - virtual ~SyclKernelFieldHandlerBase() = default; -}; - -// A class to act as the direct base for all the SYCL OpenCL Kernel construction -// tasks that contains a reference to Sema (and potentially any other -// universally required data). -class SyclKernelFieldHandler : public SyclKernelFieldHandlerBase { -protected: - SemaSYCL &SemaSYCLRef; - SyclKernelFieldHandler(SemaSYCL &S) : SemaSYCLRef(S) {} - - // Returns 'true' if the thing we're visiting (Based on the FD/QualType pair) - // is an element of an array. FD will always be the array field. When - // traversing the array field, Ty will be the type of the array field or the - // type of array element (or some decomposed type from array). - bool isArrayElement(const FieldDecl *FD, QualType Ty) const { - return !SemaSYCLRef.getASTContext().hasSameType(FD->getType(), Ty); - } -}; - -// A class to represent the 'do nothing' case for filtering purposes. -class SyclEmptyHandler final : public SyclKernelFieldHandlerBase {}; -SyclEmptyHandler GlobalEmptyHandler; - -template struct HandlerFilter; -template struct HandlerFilter { - H &Handler; - HandlerFilter(H &Handler) : Handler(Handler) {} -}; -template struct HandlerFilter { - SyclEmptyHandler &Handler = GlobalEmptyHandler; - HandlerFilter(H &Handler) {} -}; - -template struct AnyTrue; - -template struct AnyTrue { static constexpr bool Value = B; }; - -template struct AnyTrue { - static constexpr bool Value = B || AnyTrue::Value; -}; - -template struct AllTrue; - -template struct AllTrue { static constexpr bool Value = B; }; - -template struct AllTrue { - static constexpr bool Value = B && AllTrue::Value; -}; - -template -void KernelObjVisitor::VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, - Handlers &... handlers) { - // Don't continue descending if none of the handlers 'care'. This could be 'if - // constexpr' starting in C++17. Until then, we have to count on the - // optimizer to realize "if (false)" is a dead branch. - if (AnyTrue::Value) - VisitUnionImpl( - Owner, Parent, Wrapper, - HandlerFilter(handlers).Handler...); -} - -template -void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner, - FieldDecl *ArrayField, - QualType ElementTy, uint64_t Index, - Handlers &... handlers) { - // Don't continue descending if none of the handlers 'care'. This could be 'if - // constexpr' starting in C++17. Until then, we have to count on the - // optimizer to realize "if (false)" is a dead branch. - if (AnyTrue::Value) - visitArrayElementImpl( - Owner, ArrayField, ElementTy, Index, - HandlerFilter(handlers) - .Handler...); -} - -template -void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, - const CXXRecordDecl *Wrapper, - QualType RecordTy, - HandlerTys &... Handlers) { - RecordDecl *RD = RecordTy->getAsRecordDecl(); - assert(RD && "should not be null."); - if (RD->hasAttr()) { - // If this container requires decomposition, we have to visit it as - // 'complex', so all handlers are called in this case with the 'complex' - // case. - visitComplexRecord(Owner, Parent, Wrapper, RecordTy, Handlers...); - } else if (AnyTrue:: - Value) { - // We are currently in PointerHandler visitor. - if (RD->hasAttr()) { - // This is a record containing pointers. - visitComplexRecord(Owner, Parent, Wrapper, RecordTy, Handlers...); - } else { - // This is a record without pointers. - visitSimpleRecord(Owner, Parent, Wrapper, RecordTy, Handlers...); - } - } else { - // "Simple" Containers are those that do NOT need to be decomposed, - // "Complex" containers are those that DO. In the case where the container - // does NOT need to be decomposed, we can call VisitSimpleRecord on the - // handlers that have opted-out of VisitInsideSimpleContainers. The 'if' - // makes sure we only do that if at least 1 has opted out. - if (!AllTrue::Value) - visitSimpleRecord( - Owner, Parent, Wrapper, RecordTy, - HandlerFilter( - Handlers) - .Handler...); - - // Even though this is a 'simple' container, some handlers (via - // VisitInsideSimpleContainers = true) need to treat it as if it needs - // decomposing, so we call VisitComplexRecord iif at least one has. - if (AnyTrue::Value) - visitComplexRecord( - Owner, Parent, Wrapper, RecordTy, - HandlerFilter( - Handlers) - .Handler...); - } -} - -template -void KernelObjVisitor::visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, - QualType ArrayTy, HandlerTys &... Handlers) { - - if (Field->hasAttr()) { - visitComplexArray(Owner, Field, ArrayTy, Handlers...); - } else if (AnyTrue:: - Value) { - // We are currently in PointerHandler visitor. - if (Field->hasAttr()) { - // This is an array of pointers, or an array of a type containing - // pointers. - visitComplexArray(Owner, Field, ArrayTy, Handlers...); - } else { - // This is an array which does not contain pointers. - visitSimpleArray(Owner, Field, ArrayTy, Handlers...); - } - } else { - if (!AllTrue::Value) - visitSimpleArray( - Owner, Field, ArrayTy, - HandlerFilter( - Handlers) - .Handler...); - - if (AnyTrue::Value) - visitComplexArray( - Owner, Field, ArrayTy, - HandlerFilter( - Handlers) - .Handler...); - } -} - -// A type to check the validity of all of the argument types. -class SyclKernelFieldChecker : public SyclKernelFieldHandler { - bool IsInvalid = false; - DiagnosticsEngine &Diag; - // Keeps track of whether we are currently handling fields inside a struct. - // Fields of kernel functor or direct kernel captures will have a depth 0. - int StructFieldDepth = 0; - // Initialize with -1 so that fields of a base class of the kernel functor - // has depth 0. Visitor method enterStruct increments this to 0 when the base - // class is entered. - int StructBaseDepth = -1; - - // Check whether the object should be disallowed from being copied to kernel. - // Return true if not copyable, false if copyable. - bool checkNotCopyableToKernel(const FieldDecl *FD, QualType FieldTy) { - if (FieldTy->isArrayType()) { - if (const auto *CAT = - SemaSYCLRef.getASTContext().getAsConstantArrayType(FieldTy)) { - QualType ET = CAT->getElementType(); - return checkNotCopyableToKernel(FD, ET); - } - return Diag.Report(FD->getLocation(), - diag::err_sycl_non_constant_array_type) - << FieldTy; - } - - return false; - } - - bool checkPropertyListType(TemplateArgument PropList, SourceLocation Loc) { - if (PropList.getKind() != TemplateArgument::ArgKind::Type) - return SemaSYCLRef.Diag( - Loc, diag::err_sycl_invalid_accessor_property_template_param); - - QualType PropListTy = PropList.getAsType(); - if (!SemaSYCL::isSyclType(PropListTy, SYCLTypeAttr::accessor_property_list)) - return SemaSYCLRef.Diag( - Loc, diag::err_sycl_invalid_accessor_property_template_param); - - const auto *AccPropListDecl = - cast(PropListTy->getAsRecordDecl()); - if (AccPropListDecl->getTemplateArgs().size() != 1) - return SemaSYCLRef.Diag(Loc, - diag::err_sycl_invalid_property_list_param_number) - << "accessor_property_list"; - - const auto TemplArg = AccPropListDecl->getTemplateArgs()[0]; - if (TemplArg.getKind() != TemplateArgument::ArgKind::Pack) - return SemaSYCLRef.Diag( - Loc, - diag::err_sycl_invalid_accessor_property_list_template_param) - << /*accessor_property_list*/ 0 << /*parameter pack*/ 0; - - for (TemplateArgument::pack_iterator Prop = TemplArg.pack_begin(); - Prop != TemplArg.pack_end(); ++Prop) { - if (Prop->getKind() != TemplateArgument::ArgKind::Type) - return SemaSYCLRef.Diag( - Loc, - diag::err_sycl_invalid_accessor_property_list_template_param) - << /*accessor_property_list pack argument*/ 1 << /*type*/ 1; - QualType PropTy = Prop->getAsType(); - if (isAccessorPropertyType(PropTy, SYCLTypeAttr::buffer_location) && - checkBufferLocationType(PropTy, Loc)) - return true; - } - return false; - } - - bool checkBufferLocationType(QualType PropTy, SourceLocation Loc) { - const auto *PropDecl = - cast(PropTy->getAsRecordDecl()); - if (PropDecl->getTemplateArgs().size() != 1) - return SemaSYCLRef.Diag(Loc, - diag::err_sycl_invalid_property_list_param_number) - << "buffer_location"; - - const auto BufferLoc = PropDecl->getTemplateArgs()[0]; - if (BufferLoc.getKind() != TemplateArgument::ArgKind::Integral) - return SemaSYCLRef.Diag( - Loc, - diag::err_sycl_invalid_accessor_property_list_template_param) - << /*buffer_location*/ 2 << /*non-negative integer*/ 2; - - int LocationID = static_cast(BufferLoc.getAsIntegral().getExtValue()); - if (LocationID < 0) - return SemaSYCLRef.Diag( - Loc, - diag::err_sycl_invalid_accessor_property_list_template_param) - << /*buffer_location*/ 2 << /*non-negative integer*/ 2; - - return false; - } - - bool checkSyclSpecialType(QualType Ty, SourceRange Loc) { - assert(isSyclSpecialType(Ty, SemaSYCLRef) && - "Should only be called on sycl special class types."); - - // Annotated pointers and annotated arguments must be captured - // directly by the SYCL kernel. - if ((SemaSYCL::isSyclType(Ty, SYCLTypeAttr::annotated_ptr) || - SemaSYCL::isSyclType(Ty, SYCLTypeAttr::annotated_arg)) && - (StructFieldDepth > 0 || StructBaseDepth > 0)) - return SemaSYCLRef.Diag(Loc.getBegin(), - diag::err_bad_kernel_param_data_members) - << Ty << /*Struct*/ 1; - - const RecordDecl *RecD = Ty->getAsRecordDecl(); - if (const ClassTemplateSpecializationDecl *CTSD = - dyn_cast(RecD)) { - const TemplateArgumentList &TAL = CTSD->getTemplateArgs(); - TemplateArgument TA = TAL.get(0); - - // Parameter packs are used by properties so they are always valid. - if (TA.getKind() != TemplateArgument::Pack) { - llvm::DenseSet Visited; - checkSYCLType(SemaSYCLRef, TA.getAsType(), Loc, Visited); - } - - if (TAL.size() > 5) - return checkPropertyListType(TAL.get(5), Loc.getBegin()); - } - return false; - } - -public: - SyclKernelFieldChecker(SemaSYCL &S) - : SyclKernelFieldHandler(S), Diag(S.getASTContext().getDiagnostics()) {} - static constexpr const bool VisitNthArrayElement = false; - bool isValid() { return !IsInvalid; } - - bool handleReferenceType(FieldDecl *FD, QualType FieldTy) final { - Diag.Report(FD->getLocation(), diag::err_bad_kernel_param_type) << FieldTy; - IsInvalid = true; - return isValid(); - } - - bool handleReferenceType(ParmVarDecl *PD, QualType ParamTy) final { - Diag.Report(PD->getLocation(), diag::err_bad_kernel_param_type) << ParamTy; - IsInvalid = true; - return isValid(); - } - - bool handleStructType(FieldDecl *FD, QualType FieldTy) final { - CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); - assert(RD && "Not a RecordDecl inside the handler for struct type"); - if (RD->isLambda()) { - for (const LambdaCapture &LC : RD->captures()) - if (LC.capturesThis() && LC.isImplicit()) { - SemaSYCLRef.Diag(LC.getLocation(), diag::err_implicit_this_capture); - IsInvalid = true; - } - } - return isValid(); - } - - bool handleStructType(ParmVarDecl *PD, QualType ParamTy) final { - CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl(); - // For free functions all struct/class kernel arguments are forward declared - // in integration header, that adds additional restrictions for kernel - // arguments. - NotForwardDeclarableReason NFDR = - isForwardDeclarable(RD, SemaSYCLRef, /*DiagForFreeFunction=*/true); - if (NFDR != NotForwardDeclarableReason::None) { - Diag.Report(PD->getLocation(), - diag::err_bad_kernel_param_type) - << ParamTy; - Diag.Report(PD->getLocation(), - diag::note_free_function_kernel_param_type_not_fwd_declarable) - << ParamTy; - IsInvalid = true; - } - return isValid(); - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - IsInvalid |= checkSyclSpecialType(FieldTy, BS.getBeginLoc()); - return isValid(); - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - IsInvalid |= checkSyclSpecialType(FieldTy, FD->getLocation()); - return isValid(); - } - - bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { - Diag.Report(PD->getLocation(), diag::err_bad_kernel_param_type) << ParamTy; - IsInvalid = true; - return isValid(); - } - - bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - IsInvalid |= checkNotCopyableToKernel(FD, FieldTy); - return isValid(); - } - - bool handleArrayType(ParmVarDecl *PD, QualType ParamTy) final { - Diag.Report(PD->getLocation(), diag::err_bad_kernel_param_type) << ParamTy; - IsInvalid = true; - return isValid(); - } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - while (FieldTy->isAnyPointerType()) { - FieldTy = QualType{FieldTy->getPointeeOrArrayElementType(), 0}; - if (FieldTy->isVariableArrayType()) { - Diag.Report(FD->getLocation(), diag::err_vla_unsupported) << 0; - IsInvalid = true; - break; - } - } - return isValid(); - } - - bool handlePointerType(ParmVarDecl *PD, QualType ParamTy) final { - while (ParamTy->isAnyPointerType()) { - ParamTy = QualType{ParamTy->getPointeeOrArrayElementType(), 0}; - if (ParamTy->isVariableArrayType()) { - Diag.Report(PD->getLocation(), diag::err_vla_unsupported) << 0; - IsInvalid = true; - break; - } - } - return isValid(); - } - - bool handleOtherType(FieldDecl *FD, QualType FieldTy) final { - Diag.Report(FD->getLocation(), diag::err_bad_kernel_param_type) << FieldTy; - IsInvalid = true; - return isValid(); - } - - bool handleOtherType(ParmVarDecl *PD, QualType ParamTy) final { - Diag.Report(PD->getLocation(), diag::err_bad_kernel_param_type) << ParamTy; - IsInvalid = true; - return isValid(); - } - - bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { - ++StructFieldDepth; - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { - --StructFieldDepth; - return true; - } - - bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO manipulate struct depth once special types are supported for free - // function kernels. - // ++StructFieldDepth; - return true; - } - - bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *PD, - QualType ParamTy) final { - // TODO manipulate struct depth once special types are supported for free - // function kernels. - // --StructFieldDepth; - // TODO We don't yet support special types and therefore structs that - // require decomposition and leaving/entering. Diagnose for better user - // experience. - CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl(); - if (RD->hasAttr()) { - Diag.Report(PD->getLocation(), - diag::err_bad_kernel_param_type) - << ParamTy; - Diag.Report(PD->getLocation(), - diag::note_free_function_kernel_param_type_not_supported) - << ParamTy; - IsInvalid = true; - } - return isValid(); - } - - bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - ++StructBaseDepth; - return true; - } - - bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - --StructBaseDepth; - return true; - } -}; - -// A type to check the validity of accessing accessor/sampler/stream -// types as kernel parameters inside union. -class SyclKernelUnionChecker : public SyclKernelFieldHandler { - int UnionCount = 0; - bool IsInvalid = false; - DiagnosticsEngine &Diag; - -public: - SyclKernelUnionChecker(SemaSYCL &S) - : SyclKernelFieldHandler(S), Diag(S.getASTContext().getDiagnostics()) {} - bool isValid() { return !IsInvalid; } - static constexpr const bool VisitUnionBody = true; - static constexpr const bool VisitNthArrayElement = false; - - bool checkType(SourceLocation Loc, QualType Ty) { - if (UnionCount) { - IsInvalid = true; - Diag.Report(Loc, diag::err_bad_kernel_param_data_members) - << Ty << /*Union*/ 0; - } - return isValid(); - } - - bool enterUnion(const CXXRecordDecl *RD, FieldDecl *FD) override { - ++UnionCount; - return true; - } - - bool enterUnion(const CXXRecordDecl *, ParmVarDecl *) override { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveUnion(const CXXRecordDecl *RD, FieldDecl *FD) override { - --UnionCount; - return true; - } - - bool leaveUnion(const CXXRecordDecl *, ParmVarDecl *) override { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - return checkType(FD->getLocation(), FieldTy); - } - - bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - return checkType(BS.getBeginLoc(), FieldTy); - } -}; - -// A type to mark whether a collection requires decomposition -// or needs to be transformed to a new type. If a collection -// contains pointers, and is not decomposed, a new type must -// be generated with all pointers in global address space. -class SyclKernelDecompMarker : public SyclKernelFieldHandler { - llvm::SmallVector CollectionStack; - llvm::SmallVector PointerStack; - -public: - static constexpr const bool VisitUnionBody = false; - static constexpr const bool VisitNthArrayElement = false; - - SyclKernelDecompMarker(SemaSYCL &S) : SyclKernelFieldHandler(S) { - // Base entry. - CollectionStack.push_back(false); - PointerStack.push_back(false); - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &, - QualType) final { - CollectionStack.back() = true; - return true; - } - bool handleSyclSpecialType(FieldDecl *, QualType) final { - CollectionStack.back() = true; - return true; - } - - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO We don't support special types in free function kernel parameters, - // but track them to diagnose the case properly. - CollectionStack.back() = true; - return true; - } - - bool handlePointerType(FieldDecl *, QualType) final { - PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext()); - return true; - } - - bool handlePointerType(ParmVarDecl *, QualType) final { - PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext()); - return true; - } - - // Add Top level information to ease checks for processor. - bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - assert(RD && "should not be null."); - if (CollectionStack.pop_back_val() || - SemaSYCLRef.getLangOpts().SYCLDecomposeStruct) { - if (!RD->hasAttr()) - RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - PointerStack.pop_back(); - } else if (PointerStack.pop_back_val()) { - if (!RD->hasAttr()) - RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - } - assert(CollectionStack.size() == 0); - assert(PointerStack.size() == 0); - return true; - } - - bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { - CollectionStack.push_back(false); - PointerStack.push_back(false); - return true; - } - - bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - CollectionStack.push_back(false); - PointerStack.push_back(false); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType Ty) final { - // If a record needs to be decomposed, it is marked with - // SYCLRequiresDecompositionAttr. Else if a record contains - // a pointer, it is marked with SYCLGenerateNewTypeAttr. A record - // will never be marked with both attributes. - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - assert(RD && "should not be null."); - if (CollectionStack.pop_back_val()) { - if (!RD->hasAttr()) - RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - CollectionStack.back() = true; - PointerStack.pop_back(); - } else if (PointerStack.pop_back_val()) { - PointerStack.back() = true; - if (!RD->hasAttr()) - RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - } - return true; - } - - bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *, - QualType ParamTy) final { - CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl(); - assert(RD && "should not be null."); - if (CollectionStack.pop_back_val()) { - if (!RD->hasAttr()) - RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - CollectionStack.back() = true; - PointerStack.pop_back(); - } else if (PointerStack.pop_back_val()) { - PointerStack.back() = true; - if (!RD->hasAttr()) - RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - } - return true; - } - - bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, - QualType) final { - CollectionStack.push_back(false); - PointerStack.push_back(false); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, - QualType Ty) final { - // If a record needs to be decomposed, it is marked with - // SYCLRequiresDecompositionAttr. Else if a record contains - // a pointer, it is marked with SYCLGenerateNewTypeAttr. A record - // will never be marked with both attributes. - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - assert(RD && "should not be null."); - if (CollectionStack.pop_back_val()) { - if (!RD->hasAttr()) - RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - CollectionStack.back() = true; - PointerStack.pop_back(); - } else if (PointerStack.pop_back_val()) { - PointerStack.back() = true; - if (!RD->hasAttr()) - RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - } - return true; - } - - bool enterArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) final { - CollectionStack.push_back(false); - PointerStack.push_back(false); - return true; - } - - bool enterArray(ParmVarDecl *, QualType ArrayTy, QualType ElementTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveArray(FieldDecl *FD, QualType ArrayTy, QualType ElementTy) final { - // If an array needs to be decomposed, it is marked with - // SYCLRequiresDecompositionAttr. Else if the array is an array of pointers - // or an array of structs containing pointers, it is marked with - // SYCLGenerateNewTypeAttr. An array will never be marked with both - // attributes. - if (CollectionStack.pop_back_val()) { - // Cannot assert, since in MD arrays we'll end up marking them multiple - // times. - if (!FD->hasAttr()) - FD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - CollectionStack.back() = true; - PointerStack.pop_back(); - } else if (PointerStack.pop_back_val()) { - if (!FD->hasAttr()) - FD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - PointerStack.back() = true; - } - return true; - } - - bool leaveArray(ParmVarDecl *PD, QualType ArrayTy, QualType ElementTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } -}; - -static QualType ModifyAddressSpace(SemaSYCL &SemaSYCLRef, QualType Ty) { - // USM allows to use raw pointers instead of buffers/accessors, but these - // pointers point to the specially allocated memory. For pointer fields, - // except for function pointer fields, we add a kernel argument with the - // same type as field but global address space, because OpenCL requires it. - // Function pointers should have program address space. This is set in - // CodeGen. - QualType PointeeTy = Ty->getPointeeType(); - Qualifiers Quals = PointeeTy.getQualifiers(); - LangAS AS = Quals.getAddressSpace(); - // Leave global_device and global_host address spaces as is to help FPGA - // device in memory allocations. - if (!PointeeTy->isFunctionType() && AS != LangAS::sycl_global_device && - AS != LangAS::sycl_global_host) - Quals.setAddressSpace(LangAS::sycl_global); - PointeeTy = SemaSYCLRef.getASTContext().getQualifiedType( - PointeeTy.getUnqualifiedType(), Quals); - return SemaSYCLRef.getASTContext().getPointerType(PointeeTy); -} - -// This visitor is used to traverse a non-decomposed record/array to -// generate a new type corresponding to this record/array. -class SyclKernelPointerHandler : public SyclKernelFieldHandler { - llvm::SmallVector ModifiedRecords; - SmallVector ModifiedBases; - SmallVector ModifiedArrayElementsOrArray; - - IdentifierInfo *getModifiedName(IdentifierInfo *Id) { - std::string Name = - Id ? (Twine("__generated_") + Id->getName()).str() : "__generated_"; - return &SemaSYCLRef.getASTContext().Idents.get(Name); - } - - // Create Decl for the new type we are generating. - // The fields (and base classes) of this record will be generated as - // the visitor traverses kernel object record fields. - void createNewType(const CXXRecordDecl *RD) { - auto *ModifiedRD = CXXRecordDecl::Create( - SemaSYCLRef.getASTContext(), RD->getTagKind(), - const_cast(RD->getDeclContext()), SourceLocation(), - SourceLocation(), getModifiedName(RD->getIdentifier())); - ModifiedRD->startDefinition(); - if (RD->hasAttrs()) - ModifiedRD->setAttrs(RD->getAttrs()); - ModifiedRecords.push_back(ModifiedRD); - } - - // Create and add FieldDecl for FieldTy to generated record. - void addField(const FieldDecl *FD, QualType FieldTy) { - assert(!ModifiedRecords.empty() && - "ModifiedRecords should have at least 1 record"); - ASTContext &Ctx = SemaSYCLRef.getASTContext(); - auto *Field = FieldDecl::Create( - Ctx, ModifiedRecords.back(), SourceLocation(), SourceLocation(), - getModifiedName(FD->getIdentifier()), FieldTy, - Ctx.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), /*BW=*/nullptr, - /*Mutable=*/false, ICIS_NoInit); - Field->setAccess(FD->getAccess()); - if (FD->hasAttrs()) - Field->setAttrs(FD->getAttrs()); - // Add generated field to generated record. - ModifiedRecords.back()->addDecl(Field); - } - - void createBaseSpecifier(const CXXRecordDecl *Parent, const CXXRecordDecl *RD, - const CXXBaseSpecifier &BS) { - TypeSourceInfo *TInfo = - SemaSYCLRef.getASTContext().getTrivialTypeSourceInfo( - QualType(RD->getTypeForDecl(), 0), SourceLocation()); - CXXBaseSpecifier *ModifiedBase = SemaSYCLRef.SemaRef.CheckBaseSpecifier( - const_cast(Parent), SourceRange(), BS.isVirtual(), - BS.getAccessSpecifier(), TInfo, SourceLocation()); - ModifiedBases.push_back(ModifiedBase); - } - - CXXRecordDecl *getGeneratedNewRecord(const CXXRecordDecl *OldBaseDecl) { - // At this point we have finished generating fields for the new - // class corresponding to OldBaseDecl. Pop out the generated - // record. - CXXRecordDecl *ModifiedRD = ModifiedRecords.pop_back_val(); - ModifiedRD->completeDefinition(); - // Check the 'old' class for base classes. - // Set bases classes for newly generated class if it has any. - if (OldBaseDecl->getNumBases() > 0) { - SmallVector BasesForGeneratedClass; - for (size_t I = 0; I < OldBaseDecl->getNumBases(); ++I) - BasesForGeneratedClass.insert(BasesForGeneratedClass.begin(), - ModifiedBases.pop_back_val()); - ModifiedRD->setBases(BasesForGeneratedClass.data(), - OldBaseDecl->getNumBases()); - } - return ModifiedRD; - } - -public: - static constexpr const bool VisitInsideSimpleContainersWithPointer = true; - static constexpr const bool VisitNthArrayElement = false; - SyclKernelPointerHandler(SemaSYCL &S, const CXXRecordDecl *RD) - : SyclKernelFieldHandler(S) { - createNewType(RD); - } - - SyclKernelPointerHandler(SemaSYCL &S) : SyclKernelFieldHandler(S) {} - - bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType Ty) final { - createNewType(Ty->getAsCXXRecordDecl()); - return true; - } - - bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, - QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { - CXXRecordDecl *ModifiedRD = getGeneratedNewRecord(Ty->getAsCXXRecordDecl()); - - // Add this record as a field of it's parent record if it is not an - // array element. - if (!isArrayElement(FD, Ty)) - addField(FD, QualType(ModifiedRD->getTypeForDecl(), 0)); - else - ModifiedArrayElementsOrArray.push_back( - QualType(ModifiedRD->getTypeForDecl(), 0)); - - return true; - } - - bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *PD, - QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, - QualType Ty) final { - createNewType(Ty->getAsCXXRecordDecl()); - return true; - } - - bool leaveStruct(const CXXRecordDecl *Parent, const CXXBaseSpecifier &BS, - QualType Ty) final { - CXXRecordDecl *ModifiedRD = getGeneratedNewRecord(Ty->getAsCXXRecordDecl()); - - // Create CXXBaseSpecifier for this generated class. - createBaseSpecifier(Parent, ModifiedRD, BS); - return true; - } - - bool leaveArray(FieldDecl *FD, QualType ArrayTy, QualType ET) final { - QualType ModifiedArrayElement = ModifiedArrayElementsOrArray.pop_back_val(); - - const ConstantArrayType *CAT = - SemaSYCLRef.getASTContext().getAsConstantArrayType(ArrayTy); - assert(CAT && "Should only be called on constant-size array."); - QualType ModifiedArray = SemaSYCLRef.getASTContext().getConstantArrayType( - ModifiedArrayElement, CAT->getSize(), - const_cast(CAT->getSizeExpr()), CAT->getSizeModifier(), - CAT->getIndexTypeCVRQualifiers()); - - if (ModifiedRecords.empty()) { - // This is a top-level kernel argument. - ModifiedArrayElementsOrArray.push_back(ModifiedArray); - } else if (!isArrayElement(FD, ArrayTy)) { - // Add this array field as a field of it's parent record. - addField(FD, ModifiedArray); - } else { - // Multi-dimensional array element. - ModifiedArrayElementsOrArray.push_back(ModifiedArray); - } - - return true; - } - - bool leaveArray(ParmVarDecl *PD, QualType ArrayTy, QualType ET) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - QualType ModifiedPointerType = ModifyAddressSpace(SemaSYCLRef, FieldTy); - if (!isArrayElement(FD, FieldTy)) - addField(FD, ModifiedPointerType); - else - ModifiedArrayElementsOrArray.push_back(ModifiedPointerType); - // We do not need to wrap pointers since this is a pointer inside - // non-decomposed struct. - return true; - } - - bool handlePointerType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addField(FD, FieldTy); - return true; - } - - bool handleScalarType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - return handleScalarType(FD, FieldTy); - } - - bool handleUnionType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - addField(FD, Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, ParmVarDecl *PD, - QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *Parent, - const CXXBaseSpecifier &BS, QualType Ty) final { - createBaseSpecifier(Parent, Ty->getAsCXXRecordDecl(), BS); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType Ty) final { - addField(FD, Ty); - return true; - } - -public: - QualType getNewRecordType() { - CXXRecordDecl *ModifiedRD = ModifiedRecords.pop_back_val(); - ModifiedRD->completeDefinition(); - - if (!ModifiedBases.empty()) - ModifiedRD->setBases(ModifiedBases.data(), ModifiedBases.size()); - - return QualType(ModifiedRD->getTypeForDecl(), 0); - } - QualType getNewArrayType() { - return ModifiedArrayElementsOrArray.pop_back_val(); - } -}; - -// A type to Create and own the FunctionDecl for the kernel. -class SyclKernelDeclCreator : public SyclKernelFieldHandler { - FunctionDecl *KernelDecl = nullptr; - llvm::SmallVector Params; - Sema::ContextRAII FuncContext; - // Holds the last handled field's first parameter. This doesn't store an - // iterator as push_back invalidates iterators. - size_t LastParamIndex = 0; - // Keeps track of whether we are currently handling fields inside a struct. - int StructDepth = 0; - - void addParam(const FieldDecl *FD, QualType FieldTy) { - ParamDesc newParamDesc = makeParamDesc(FD, FieldTy); - addParam(newParamDesc, FieldTy); - } - - void addParam(const ParmVarDecl *PD, QualType ParamTy) { - ParamDesc newParamDesc = makeParamDesc(PD, ParamTy); - addParam(newParamDesc, ParamTy); - } - - void addParam(const CXXBaseSpecifier &BS, QualType FieldTy) { - // TODO: There is no name for the base available, but duplicate names are - // seemingly already possible, so we'll give them all the same name for now. - // This only happens with the accessor types. - StringRef Name = "_arg__base"; - ParamDesc newParamDesc = - makeParamDesc(SemaSYCLRef.getASTContext(), Name, FieldTy); - addParam(newParamDesc, FieldTy); - } - // Add a parameter with specified name and type - void addParam(StringRef Name, QualType ParamTy) { - ParamDesc newParamDesc = - makeParamDesc(SemaSYCLRef.getASTContext(), Name, ParamTy); - addParam(newParamDesc, ParamTy); - } - - void addParam(ParamDesc newParamDesc, QualType FieldTy) { - // Create a new ParmVarDecl based on the new info. - ASTContext &Ctx = SemaSYCLRef.getASTContext(); - auto *NewParam = ParmVarDecl::Create( - Ctx, KernelDecl, SourceLocation(), SourceLocation(), - std::get<1>(newParamDesc), std::get<0>(newParamDesc), - std::get<2>(newParamDesc), SC_None, /*DefArg*/ nullptr); - NewParam->setScopeInfo(0, Params.size()); - NewParam->setIsUsed(); - - LastParamIndex = Params.size(); - Params.push_back(NewParam); - } - - // Handle accessor properties. If any properties were found in - // the accessor_property_list - add the appropriate attributes to ParmVarDecl. - void handleAccessorPropertyList(ParmVarDecl *Param, - const CXXRecordDecl *RecordDecl, - SourceLocation Loc) { - const auto *AccTy = cast(RecordDecl); - if (AccTy->getTemplateArgs().size() < 6) - return; - const auto PropList = cast(AccTy->getTemplateArgs()[5]); - QualType PropListTy = PropList.getAsType(); - const auto *AccPropListDecl = - cast(PropListTy->getAsRecordDecl()); - const auto TemplArg = AccPropListDecl->getTemplateArgs()[0]; - // Move through TemplateArgs list of a property list and search for - // properties. If found - apply the appropriate attribute to ParmVarDecl. - for (TemplateArgument::pack_iterator Prop = TemplArg.pack_begin(); - Prop != TemplArg.pack_end(); ++Prop) { - QualType PropTy = Prop->getAsType(); - if (isAccessorPropertyType(PropTy, SYCLTypeAttr::no_alias)) - handleNoAliasProperty(Param, PropTy, Loc); - if (isAccessorPropertyType(PropTy, SYCLTypeAttr::buffer_location)) - handleBufferLocationProperty(Param, PropTy, Loc); - } - } - - void handleNoAliasProperty(ParmVarDecl *Param, QualType PropTy, - SourceLocation Loc) { - ASTContext &Ctx = SemaSYCLRef.getASTContext(); - Param->addAttr(RestrictAttr::CreateImplicit(Ctx, Loc)); - } - - // Obtain an integer value stored in a template parameter of buffer_location - // property to pass it to buffer_location kernel attribute - void handleBufferLocationProperty(ParmVarDecl *Param, QualType PropTy, - SourceLocation Loc) { - // If we have more than 1 buffer_location properties on a single - // accessor - emit an error - if (Param->hasAttr()) { - SemaSYCLRef.Diag(Loc, diag::err_sycl_compiletime_property_duplication) - << "buffer_location"; - return; - } - ASTContext &Ctx = SemaSYCLRef.getASTContext(); - const auto *PropDecl = - cast(PropTy->getAsRecordDecl()); - const auto BufferLoc = PropDecl->getTemplateArgs()[0]; - int LocationID = static_cast(BufferLoc.getAsIntegral().getExtValue()); - Param->addAttr( - SYCLIntelBufferLocationAttr::CreateImplicit(Ctx, LocationID)); - } - - // Additional processing is required for accessor type. - void handleAccessorType(QualType FieldTy, const CXXRecordDecl *RecordDecl, - SourceLocation Loc) { - handleAccessorPropertyList(Params.back(), RecordDecl, Loc); - - // If "accessor" type check if read only - if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::accessor)) { - // Get access mode of accessor. - const auto *AccessorSpecializationDecl = - cast(RecordDecl); - const TemplateArgument &AccessModeArg = - AccessorSpecializationDecl->getTemplateArgs().get(2); - if (isReadOnlyAccessor(AccessModeArg)) - Params.back()->addAttr(SYCLAccessorReadonlyAttr::CreateImplicit( - SemaSYCLRef.getASTContext())); - } - - // Add implicit attribute to parameter decl when it is a read only - // SYCL accessor. - Params.back()->addAttr( - SYCLAccessorPtrAttr::CreateImplicit(SemaSYCLRef.getASTContext())); - } - - // All special SYCL objects must have __init method. We extract types for - // kernel parameters from __init method parameters. We will use __init method - // and kernel parameters which we build here to initialize special objects in - // the kernel body. - bool handleSpecialType(FieldDecl *FD, QualType FieldTy) { - const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); - assert(RecordDecl && "The type must be a RecordDecl"); - llvm::StringLiteral MethodName = - KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) - ? InitESIMDMethodName - : InitMethodName; - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); - assert(InitMethod && "The type must have the __init method"); - - // Don't do -1 here because we count on this to be the first parameter added - // (if any). - size_t ParamIndex = Params.size(); - for (const ParmVarDecl *Param : InitMethod->parameters()) { - QualType ParamTy = Param->getType(); - addParam(FD, ParamTy.getCanonicalType()); - - // Propagate add_ir_attributes_kernel_parameter attribute. - if (const auto *AddIRAttr = - Param->getAttr()) - Params.back()->addAttr(AddIRAttr->clone(SemaSYCLRef.getASTContext())); - - // FIXME: This code is temporary, and will be removed once __init_esimd - // is removed and property list refactored. - // The function handleAccessorType includes a call to - // handleAccessorPropertyList. If new classes with property list are - // added, this code needs to be refactored to call - // handleAccessorPropertyList for each class which requires it. - if (ParamTy.getTypePtr()->isPointerType() && isSyclAccessorType(FieldTy)) - handleAccessorType(FieldTy, RecordDecl, FD->getBeginLoc()); - } - LastParamIndex = ParamIndex; - return true; - } - - static void setKernelImplicitAttrs(ASTContext &Context, FunctionDecl *FD, - bool IsSIMDKernel) { - // Set implicit attributes. - FD->addAttr(OpenCLKernelAttr::CreateImplicit(Context)); - FD->addAttr(ArtificialAttr::CreateImplicit(Context)); - if (IsSIMDKernel) - FD->addAttr(SYCLSimdAttr::CreateImplicit(Context)); - } - - static FunctionDecl *createKernelDecl(ASTContext &Ctx, SourceLocation Loc, - bool IsInline, bool IsSIMDKernel) { - // Create this with no prototype, and we can fix this up after we've seen - // all the params. - FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel); - QualType FuncType = Ctx.getFunctionType(Ctx.VoidTy, {}, Info); - - FunctionDecl *FD = FunctionDecl::Create( - Ctx, Ctx.getTranslationUnitDecl(), Loc, Loc, DeclarationName(), - FuncType, Ctx.getTrivialTypeSourceInfo(Ctx.VoidTy), SC_None); - FD->setImplicitlyInline(IsInline); - setKernelImplicitAttrs(Ctx, FD, IsSIMDKernel); - - // Add kernel to translation unit to see it in AST-dump. - Ctx.getTranslationUnitDecl()->addDecl(FD); - return FD; - } - - // If the record has been marked with SYCLGenerateNewTypeAttr, - // it implies that it contains a pointer within. This function - // defines a PointerHandler visitor which visits this record - // recursively and modifies the address spaces of any pointer - // found as required, thereby generating a new record with all - // pointers in 'right' address space. PointerHandler.getNewRecordType() - // returns this generated type. - QualType GenerateNewRecordType(const CXXRecordDecl *RD) { - SyclKernelPointerHandler PointerHandler(SemaSYCLRef, RD); - KernelObjVisitor Visitor{SemaSYCLRef}; - Visitor.VisitRecordBases(RD, PointerHandler); - Visitor.VisitRecordFields(RD, PointerHandler); - return PointerHandler.getNewRecordType(); - } - - // If the array has been marked with SYCLGenerateNewTypeAttr, - // it implies that this is an array of pointers, or an array - // of a type which contains pointers. This function generates - // a new array with all pointers in the required address space. - QualType GenerateNewArrayType(FieldDecl *FD, QualType FieldTy) { - const auto *Owner = dyn_cast(FD->getParent()); - SyclKernelPointerHandler PointerHandler(SemaSYCLRef); - KernelObjVisitor Visitor{SemaSYCLRef}; - Visitor.visitArray(Owner, FD, FieldTy, PointerHandler); - return PointerHandler.getNewArrayType(); - } - -public: - static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelDeclCreator(SemaSYCL &S, SourceLocation Loc, bool IsInline, - bool IsSIMDKernel, FunctionDecl *SYCLKernel) - : SyclKernelFieldHandler(S), - KernelDecl( - createKernelDecl(S.getASTContext(), Loc, IsInline, IsSIMDKernel)), - FuncContext(SemaSYCLRef.SemaRef, KernelDecl) { - S.addSyclOpenCLKernel(SYCLKernel, KernelDecl); - for (const auto *IRAttr : - SYCLKernel->specific_attrs()) { - KernelDecl->addAttr(IRAttr->clone(SemaSYCLRef.getASTContext())); - } - } - - ~SyclKernelDeclCreator() { - ASTContext &Ctx = SemaSYCLRef.getASTContext(); - FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel); - - SmallVector ArgTys; - std::transform(std::begin(Params), std::end(Params), - std::back_inserter(ArgTys), - [](const ParmVarDecl *PVD) { return PVD->getType(); }); - - QualType FuncType = Ctx.getFunctionType(Ctx.VoidTy, ArgTys, Info); - KernelDecl->setType(FuncType); - KernelDecl->setParams(Params); - - // Make sure that this is marked as a kernel so that the code-gen can make - // decisions based on that. We cannot add this earlier, otherwise the call - // to TransformStmt in replaceWithLocalClone can diagnose something that got - // diagnosed on the actual kernel. - KernelDecl->addAttr( - SYCLKernelAttr::CreateImplicit(SemaSYCLRef.getASTContext())); - - SemaSYCLRef.addSyclDeviceDecl(KernelDecl); - } - - bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { - ++StructDepth; - return true; - } - - bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO - // ++StructDepth; - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { - --StructDepth; - return true; - } - - bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO - // --StructDepth; - return true; - } - - bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - ++StructDepth; - return true; - } - - bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - --StructDepth; - return true; - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); - assert(RecordDecl && "The type must be a RecordDecl"); - llvm::StringLiteral MethodName = - KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) - ? InitESIMDMethodName - : InitMethodName; - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); - assert(InitMethod && "The type must have the __init method"); - - // Don't do -1 here because we count on this to be the first parameter added - // (if any). - size_t ParamIndex = Params.size(); - for (const ParmVarDecl *Param : InitMethod->parameters()) { - QualType ParamTy = Param->getType(); - addParam(BS, ParamTy.getCanonicalType()); - // FIXME: This code is temporary, and will be removed once __init_esimd - // is removed and property list refactored. - // The function handleAccessorType includes a call to - // handleAccessorPropertyList. If new classes with property list are - // added, this code needs to be refactored to call - // handleAccessorPropertyList for each class which requires it. - if (ParamTy.getTypePtr()->isPointerType() && isSyclAccessorType(FieldTy)) - handleAccessorType(FieldTy, RecordDecl, BS.getBeginLoc()); - } - LastParamIndex = ParamIndex; - return true; - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - return handleSpecialType(FD, FieldTy); - } - - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - RecordDecl *wrapField(FieldDecl *Field, QualType FieldTy) { - RecordDecl *WrapperClass = - SemaSYCLRef.getASTContext().buildImplicitRecord("__wrapper_class"); - WrapperClass->startDefinition(); - Field = FieldDecl::Create( - SemaSYCLRef.getASTContext(), WrapperClass, SourceLocation(), - SourceLocation(), /*Id=*/nullptr, FieldTy, - SemaSYCLRef.getASTContext().getTrivialTypeSourceInfo(FieldTy, - SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - WrapperClass->addDecl(Field); - WrapperClass->completeDefinition(); - return WrapperClass; - }; - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - QualType ModTy = ModifyAddressSpace(SemaSYCLRef, FieldTy); - // When the kernel is generated, struct type kernel arguments are - // decomposed; i.e. the parameters of the kernel are the fields of the - // struct, and not the struct itself. This causes an error in the backend - // when the struct field is a pointer, since non-USM pointers cannot be - // passed directly. To work around this issue, all pointers inside the - // struct are wrapped in a generated '__wrapper_class'. - if (StructDepth) { - RecordDecl *WrappedPointer = wrapField(FD, ModTy); - ModTy = SemaSYCLRef.getASTContext().getRecordType(WrappedPointer); - } - - addParam(FD, ModTy); - return true; - } - - bool handlePointerType(ParmVarDecl *PD, QualType ParamTy) final { - QualType ModTy = ModifyAddressSpace(SemaSYCLRef, ParamTy); - addParam(PD, ModTy); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - QualType ArrayTy = FieldTy; - - // This is an array of pointers or an array of a type with pointer. - if (FD->hasAttr()) - ArrayTy = GenerateNewArrayType(FD, FieldTy); - - // Arrays are wrapped in a struct since they cannot be passed directly. - RecordDecl *WrappedArray = wrapField(FD, ArrayTy); - addParam(FD, SemaSYCLRef.getASTContext().getRecordType(WrappedArray)); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy); - return true; - } - - bool handleScalarType(ParmVarDecl *PD, QualType ParamTy) final { - addParam(PD, ParamTy); - return true; - } - - bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { - StringRef Name = "_arg__sycl_functor"; - addParam(Name, Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *RD, FieldDecl *FD, - QualType Ty) final { - // This is a field which should not be decomposed. - CXXRecordDecl *FieldRecordDecl = Ty->getAsCXXRecordDecl(); - assert(FieldRecordDecl && "Type must be a C++ record type"); - // Check if we need to generate a new type for this record, - // i.e. this record contains pointers. - if (FieldRecordDecl->hasAttr()) - addParam(FD, GenerateNewRecordType(FieldRecordDecl)); - else - addParam(FD, Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *RD, ParmVarDecl *PD, - QualType ParamTy) final { - // This is a struct parameter which should not be decomposed. - CXXRecordDecl *ParamRecordDecl = ParamTy->getAsCXXRecordDecl(); - assert(ParamRecordDecl && "Type must be a C++ record type"); - // Check if we need to generate a new type for this record, - // i.e. this record contains pointers. - if (ParamRecordDecl->hasAttr()) - addParam(PD, GenerateNewRecordType(ParamRecordDecl)); - else - addParam(PD, ParamTy); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *Base, - const CXXBaseSpecifier &BS, QualType Ty) final { - // This is a base class which should not be decomposed. - CXXRecordDecl *BaseRecordDecl = Ty->getAsCXXRecordDecl(); - assert(BaseRecordDecl && "Type must be a C++ record type"); - // Check if we need to generate a new type for this record, - // i.e. this record contains pointers. - if (BaseRecordDecl->hasAttr()) - addParam(BS, GenerateNewRecordType(BaseRecordDecl)); - else - addParam(BS, Ty); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - return handleScalarType(FD, FieldTy); - } - - bool handleUnionType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - // Generate kernel argument to initialize specialization constants. - void handleSyclKernelHandlerType() { - ASTContext &Context = SemaSYCLRef.getASTContext(); - StringRef Name = "_arg__specialization_constants_buffer"; - addParam(Name, Context.getPointerType(Context.getAddrSpaceQualType( - Context.CharTy, LangAS::sycl_global))); - } - - void setBody(CompoundStmt *KB) { KernelDecl->setBody(KB); } - - FunctionDecl *getKernelDecl() { return KernelDecl; } - - llvm::ArrayRef getParamVarDeclsForCurrentField() { - return ArrayRef(std::begin(Params) + LastParamIndex, - std::end(Params)); - } -}; - -// This Visitor traverses the AST of the function with -// `sycl_kernel` attribute and returns the version of “operator()()” that is -// called by KernelFunc. There will only be one call to KernelFunc in that -// AST because the DPC++ headers are structured such that the user’s -// kernel function is only called once. This ensures that the correct -// “operator()()” function call is returned, when a named function object used -// to define a kernel has more than one “operator()()” calls defined in it. For -// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on -// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument. -// class MyKernel { -// public: -// void operator()() const { -// // code -// } -// -// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const -// { -// // code -// } -// }; -// -// int main() { -// -// Q.submit([&](sycl::handler& cgh) { -// MyKernel kernelFunctorObject; -// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject); -// }); -// return 0; -// } - -class KernelCallOperatorVisitor - : public RecursiveASTVisitor { - - FunctionDecl *KernelCallerFunc; - -public: - CXXMethodDecl *CallOperator = nullptr; - const CXXRecordDecl *KernelObj; - - KernelCallOperatorVisitor(FunctionDecl *KernelCallerFunc, - const CXXRecordDecl *KernelObj) - : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} - - bool VisitCallExpr(CallExpr *CE) { - Decl *CalleeDecl = CE->getCalleeDecl(); - if (isa_and_nonnull(CalleeDecl)) { - CXXMethodDecl *MD = cast(CalleeDecl); - if (MD->getOverloadedOperator() == OO_Call && - MD->getParent() == KernelObj) { - CallOperator = MD; - } - } - return true; - } - - CXXMethodDecl *getCallOperator() { - if (CallOperator) - return CallOperator; - - TraverseDecl(KernelCallerFunc); - return CallOperator; - } -}; - -class ESIMDKernelDiagnostics : public SyclKernelFieldHandler { - - SourceLocation KernelLoc; - bool IsESIMD = false; - - bool handleSpecialType(QualType FieldTy) { - const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); - - if (IsESIMD && !isSyclAccessorType(FieldTy)) - return SemaSYCLRef.Diag(KernelLoc, - diag::err_sycl_esimd_not_supported_for_type) - << RecordDecl; - return true; - } - -public: - ESIMDKernelDiagnostics(SemaSYCL &S, SourceLocation Loc, bool IsESIMD) - : SyclKernelFieldHandler(S), KernelLoc(Loc), IsESIMD(IsESIMD) {} - - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - return handleSpecialType(FieldTy); - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - return handleSpecialType(FieldTy); - } - - using SyclKernelFieldHandler::handleSyclSpecialType; -}; - -class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { - SourceLocation KernelLoc; - unsigned SizeOfParams = 0; - bool IsESIMD = false; - - void addParam(QualType ArgTy) { - SizeOfParams += - SemaSYCLRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); - } - - bool handleSpecialType(QualType FieldTy) { - const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); - assert(RecordDecl && "The type must be a RecordDecl"); - llvm::StringLiteral MethodName = (IsESIMD && isSyclAccessorType(FieldTy)) - ? InitESIMDMethodName - : InitMethodName; - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); - assert(InitMethod && "The type must have the __init method"); - for (const ParmVarDecl *Param : InitMethod->parameters()) - addParam(Param->getType()); - return true; - } - -public: - static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelArgsSizeChecker(SemaSYCL &S, SourceLocation Loc, bool IsESIMD) - : SyclKernelFieldHandler(S), KernelLoc(Loc), IsESIMD(IsESIMD) {} - - ~SyclKernelArgsSizeChecker() { - if (SizeOfParams > MaxKernelArgsSize) - SemaSYCLRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_big_args) - << SizeOfParams << MaxKernelArgsSize; - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - return handleSpecialType(FieldTy); - } - - bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - return handleSpecialType(FieldTy); - } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - addParam(FieldTy); - return true; - } - - bool handlePointerType(ParmVarDecl *PD, QualType ParamTy) final { - addParam(ParamTy); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addParam(FieldTy); - return true; - } - - bool handleScalarType(ParmVarDecl *PD, QualType ParamTy) final { - addParam(ParamTy); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - addParam(FieldTy); - return true; - } - - bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { - addParam(Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - addParam(Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, ParmVarDecl *, - QualType ParamTy) final { - addParam(ParamTy); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *Base, - const CXXBaseSpecifier &BS, QualType Ty) final { - addParam(Ty); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - return handleScalarType(FD, FieldTy); - } - - bool handleUnionType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } -}; - -std::string getKernelArgDesc(StringRef KernelArgDescription) { - if (KernelArgDescription == "") - return ""; - return ("Compiler generated argument for " + KernelArgDescription + ",") - .str(); -} - -class SyclOptReportCreator : public SyclKernelFieldHandler { - SyclKernelDeclCreator &DC; - SourceLocation KernelInvocationLoc; - - void addParam(const FieldDecl *KernelArg, QualType KernelArgType, - StringRef KernelArgDescription, - bool IsCompilerGeneratedType = false) { - StringRef NameToEmitInDescription = KernelArg->getName(); - const RecordDecl *KernelArgParent = KernelArg->getParent(); - if (KernelArgParent && KernelArgDescription == "decomposed struct/class") - NameToEmitInDescription = KernelArgParent->getName(); - - unsigned KernelArgSize = SemaSYCLRef.getASTContext() - .getTypeSizeInChars(KernelArgType) - .getQuantity(); - - SemaSYCLRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( - DC.getKernelDecl(), NameToEmitInDescription, - IsCompilerGeneratedType ? "Compiler generated" - : KernelArgType.getAsString(), - KernelInvocationLoc, KernelArgSize, - getKernelArgDesc(KernelArgDescription), - (KernelArgDescription == "decomposed struct/class") - ? ("Field:" + KernelArg->getName().str() + ", ") - : ""); - } - - void addParam(const FieldDecl *FD, QualType FieldTy) { - std::string KernelArgDescription = ""; - const RecordDecl *RD = FD->getParent(); - if (RD && RD->hasAttr()) - KernelArgDescription = "decomposed struct/class"; - - addParam(FD, FieldTy, KernelArgDescription); - } - - // Handles base classes. - void addParam(const CXXBaseSpecifier &, QualType KernelArgType, - StringRef KernelArgDescription, - bool IsCompilerGeneratedType = false) { - unsigned KernelArgSize = SemaSYCLRef.getASTContext() - .getTypeSizeInChars(KernelArgType) - .getQuantity(); - SemaSYCLRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( - DC.getKernelDecl(), KernelArgType.getAsString(), - IsCompilerGeneratedType ? "Compiler generated" - : KernelArgType.getAsString(), - KernelInvocationLoc, KernelArgSize, - getKernelArgDesc(KernelArgDescription), ""); - } - - // Handles specialization constants. - void addParam(QualType KernelArgType, std::string KernelArgDescription) { - unsigned KernelArgSize = SemaSYCLRef.getASTContext() - .getTypeSizeInChars(KernelArgType) - .getQuantity(); - SemaSYCLRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( - DC.getKernelDecl(), "", KernelArgType.getAsString(), - KernelInvocationLoc, KernelArgSize, - getKernelArgDesc(KernelArgDescription), ""); - } - -public: - static constexpr const bool VisitInsideSimpleContainers = false; - SyclOptReportCreator(SemaSYCL &S, SyclKernelDeclCreator &DC, - SourceLocation Loc) - : SyclKernelFieldHandler(S), DC(DC), KernelInvocationLoc(Loc) {} - - using SyclKernelFieldHandler::handleSyclSpecialType; - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - for (const auto *Param : DC.getParamVarDeclsForCurrentField()) - addParam(FD, Param->getType(), FieldTy.getAsString()); - return true; - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType FieldTy) final { - std::string KernelArgDescription = "base class " + FieldTy.getAsString(); - for (const auto *Param : DC.getParamVarDeclsForCurrentField()) { - QualType KernelArgType = Param->getType(); - unsigned KernelArgSize = SemaSYCLRef.getASTContext() - .getTypeSizeInChars(KernelArgType) - .getQuantity(); - SemaSYCLRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( - DC.getKernelDecl(), FieldTy.getAsString(), - KernelArgType.getAsString(), KernelInvocationLoc, KernelArgSize, - getKernelArgDesc(KernelArgDescription), ""); - } - return true; - } - - using SyclKernelFieldHandler::handlePointerType; - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - std::string KernelArgDescription = ""; - bool IsCompilerGeneratedType = false; - ParmVarDecl *KernelParameter = DC.getParamVarDeclsForCurrentField()[0]; - // Compiler generated openCL kernel argument for current pointer field - // is not a pointer. This means we are processing a nested pointer and - // the openCL kernel argument is of type __wrapper_class. - if (!KernelParameter->getType()->isPointerType()) { - KernelArgDescription = "nested pointer"; - IsCompilerGeneratedType = true; - } - - for (const auto *Param : DC.getParamVarDeclsForCurrentField()) - addParam(FD, Param->getType(), KernelArgDescription, - IsCompilerGeneratedType); - return true; - } - - using SyclKernelFieldHandler::handleScalarType; - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy); - return true; - } - - using SyclKernelFieldHandler::handleSimpleArrayType; - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - // Simple arrays are always wrapped. - for (const auto *Param : DC.getParamVarDeclsForCurrentField()) - addParam(FD, Param->getType(), "array", /*IsCompilerGeneratedType*/ true); - return true; - } - - bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { - addParam(DC.getParamVarDeclsForCurrentField()[0]->getType(), - "SYCL Functor"); - return true; - } - - using SyclKernelFieldHandler::handleNonDecompStruct; - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - assert(RD && "Type must be a C++ record type"); - if (RD->hasAttr()) - addParam(FD, Ty, "object with pointer", /*IsCompilerGeneratedType*/ true); - else - addParam(FD, Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType Ty) final { - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - assert(RD && "Type must be a C++ record type"); - if (RD->hasAttr()) - addParam(BS, Ty, "base class with pointer", - /*IsCompilerGeneratedType*/ true); - else - addParam(BS, Ty, "base class"); - return true; - } - - using SyclKernelFieldHandler::handleUnionType; - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - return handleScalarType(FD, FieldTy); - } - - void handleSyclKernelHandlerType() { - addParam(DC.getParamVarDeclsForCurrentField()[0]->getType(), - "SYCL2020 specialization constant"); - } -}; - -static bool isESIMDKernelType(CXXMethodDecl *CallOperator) { - return (CallOperator != nullptr) && CallOperator->hasAttr(); -} - -class SyclKernelBodyCreator : public SyclKernelFieldHandler { - bool UseTopLevelKernelObj; - SyclKernelDeclCreator &DeclCreator; - llvm::SmallVector BodyStmts; - llvm::SmallVector CollectionInitExprs; - llvm::SmallVector FinalizeStmts; - // This collection contains the information required to add/remove information - // about arrays as we enter them. The InitializedEntity component is - // necessary for initializing child members. uin64_t is the index of the - // current element being worked on, which is updated every time we visit - // nextElement. - llvm::SmallVector, 8> ArrayInfos; - VarDecl *KernelObjClone; - std::optional VarEntity; - llvm::SmallVector MemberExprBases; - llvm::SmallVector ArrayParamBases; - FunctionDecl *KernelCallerFunc; - SourceLocation KernelCallerSrcLoc; // KernelCallerFunc source location. - // Contains a count of how many containers we're in. This is used by the - // pointer-struct-wrapping code to ensure that we don't try to wrap - // top-level pointers. - uint64_t StructDepth = 0; - VarDecl *KernelHandlerClone = nullptr; - bool IsESIMD = false; - CXXMethodDecl *CallOperator = nullptr; - - Stmt *replaceWithLocalClone(ParmVarDecl *OriginalParam, VarDecl *LocalClone, - Stmt *FunctionBody) { - // DeclRefExpr with valid source location but with decl which is not marked - // as used is invalid. - LocalClone->setIsUsed(); - std::pair MappingPair = - std::make_pair(OriginalParam, LocalClone); - KernelBodyTransform KBT(MappingPair, SemaSYCLRef.SemaRef); - return KBT.TransformStmt(FunctionBody).get(); - } - - // Using the statements/init expressions that we've created, this generates - // the kernel body compound stmt. CompoundStmt needs to know its number of - // statements in advance to allocate it, so we cannot do this as we go along. - CompoundStmt *createKernelBody() { - // Push the Kernel function scope to ensure the scope isn't empty - SemaSYCLRef.SemaRef.PushFunctionScope(); - - if (!UseTopLevelKernelObj) { - // Initialize kernel object local clone - assert(CollectionInitExprs.size() == 1 && - "Should have been popped down to just the first one"); - KernelObjClone->setInit(CollectionInitExprs.back()); - } - - // Replace references to the kernel object in kernel body, to use the - // compiler generated local clone - Stmt *NewBody = - replaceWithLocalClone(KernelCallerFunc->getParamDecl(0), KernelObjClone, - KernelCallerFunc->getBody()); - - // If kernel_handler argument is passed by SYCL kernel, replace references - // to this argument in kernel body, to use the compiler generated local - // clone - if (ParmVarDecl *KernelHandlerParam = - getSyclKernelHandlerArg(KernelCallerFunc)) - NewBody = replaceWithLocalClone(KernelHandlerParam, KernelHandlerClone, - NewBody); - - // Use transformed body (with clones) as kernel body - BodyStmts.push_back(NewBody); - - BodyStmts.insert(BodyStmts.end(), FinalizeStmts.begin(), - FinalizeStmts.end()); - - return CompoundStmt::Create(SemaSYCLRef.getASTContext(), BodyStmts, - FPOptionsOverride(), {}, {}); - } - - void annotateHierarchicalParallelismAPICalls() { - // Is this a hierarchical parallelism kernel invocation? - if (getKernelInvocationKind(KernelCallerFunc) != InvokeParallelForWorkGroup) - return; - - // Mark kernel object with work-group scope attribute to avoid work-item - // scope memory allocation. - KernelObjClone->addAttr(SYCLScopeAttr::CreateImplicit( - SemaSYCLRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); - - assert(CallOperator && "non callable object is passed as kernel obj"); - // Mark the function that it "works" in a work group scope: - // NOTE: In case of wait_for the marker call itself is - // marked with work item scope attribute, here the '()' operator of the - // object passed as parameter is marked. This is an optimization - - // there are a lot of locals created at parallel_for_work_group - // scope before calling the lambda - it is more efficient to have - // all of them in the private address space rather then sharing via - // the local AS. See parallel_for_work_group implementation in the - // SYCL headers. - if (!CallOperator->hasAttr()) { - CallOperator->addAttr(SYCLScopeAttr::CreateImplicit( - SemaSYCLRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); - // Search and mark wait_for calls: - MarkWIScopeFnVisitor MarkWIScope(SemaSYCLRef.getASTContext()); - MarkWIScope.TraverseDecl(CallOperator); - // Now mark local variables declared in the PFWG lambda with work group - // scope attribute - addScopeAttrToLocalVars(*CallOperator); - } - } - - // Creates a DeclRefExpr to the ParmVar that represents the current field. - Expr *createParamReferenceExpr() { - ParmVarDecl *KernelParameter = - DeclCreator.getParamVarDeclsForCurrentField()[0]; - - QualType ParamType = KernelParameter->getOriginalType(); - Expr *DRE = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - KernelParameter, ParamType, VK_LValue, KernelCallerSrcLoc); - return DRE; - } - - // Creates a DeclRefExpr to the ParmVar that represents the current pointer - // field. - Expr *createPointerParamReferenceExpr(QualType PointerTy, bool Wrapped) { - ParmVarDecl *KernelParameter = - DeclCreator.getParamVarDeclsForCurrentField()[0]; - - QualType ParamType = KernelParameter->getOriginalType(); - Expr *DRE = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - KernelParameter, ParamType, VK_LValue, KernelCallerSrcLoc); - - // Struct Type kernel arguments are decomposed. The pointer fields are - // then wrapped inside a compiler generated struct. Therefore when - // generating the initializers, we have to 'unwrap' the pointer. - if (Wrapped) { - CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); - // Pointer field wrapped inside __wrapper_class - FieldDecl *Pointer = *(WrapperStruct->field_begin()); - DRE = buildMemberExpr(DRE, Pointer); - ParamType = Pointer->getType(); - } - - DRE = ImplicitCastExpr::Create(SemaSYCLRef.getASTContext(), ParamType, - CK_LValueToRValue, DRE, /*BasePath=*/nullptr, - VK_PRValue, FPOptionsOverride()); - - if (PointerTy->getPointeeType().getAddressSpace() != - ParamType->getPointeeType().getAddressSpace()) - DRE = ImplicitCastExpr::Create(SemaSYCLRef.getASTContext(), PointerTy, - CK_AddressSpaceConversion, DRE, nullptr, - VK_PRValue, FPOptionsOverride()); - - return DRE; - } - - Expr *createSimpleArrayParamReferenceExpr(QualType ArrayTy) { - ParmVarDecl *KernelParameter = - DeclCreator.getParamVarDeclsForCurrentField()[0]; - QualType ParamType = KernelParameter->getOriginalType(); - Expr *DRE = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - KernelParameter, ParamType, VK_LValue, KernelCallerSrcLoc); - - // Unwrap the array. - CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); - FieldDecl *ArrayField = *(WrapperStruct->field_begin()); - return buildMemberExpr(DRE, ArrayField); - } - - // Creates an initialized entity for a field/item. In the case where this is a - // field, returns a normal member initializer, if we're in a sub-array of a MD - // array, returns an element initializer. - InitializedEntity getFieldEntity(FieldDecl *FD, QualType Ty) { - if (isArrayElement(FD, Ty)) - return InitializedEntity::InitializeElement(SemaSYCLRef.getASTContext(), - ArrayInfos.back().second, - ArrayInfos.back().first); - return InitializedEntity::InitializeMember(FD, &VarEntity.value()); - } - - void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef) { - InitializationKind InitKind = - InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc); - addFieldInit(FD, Ty, ParamRef, InitKind); - } - - void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef, - InitializationKind InitKind) { - addFieldInit(FD, Ty, ParamRef, InitKind, getFieldEntity(FD, Ty)); - } - - void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef, - InitializationKind InitKind, InitializedEntity Entity) { - InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, - ParamRef); - ExprResult Init = - InitSeq.Perform(SemaSYCLRef.SemaRef, Entity, InitKind, ParamRef); - - InitListExpr *ParentILE = CollectionInitExprs.back(); - ParentILE->updateInit(SemaSYCLRef.getASTContext(), ParentILE->getNumInits(), - Init.get()); - } - - void addBaseInit(const CXXBaseSpecifier &BS, QualType Ty, - InitializationKind InitKind) { - InitializedEntity Entity = InitializedEntity::InitializeBase( - SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity.value()); - InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, - std::nullopt); - ExprResult Init = - InitSeq.Perform(SemaSYCLRef.SemaRef, Entity, InitKind, std::nullopt); - - InitListExpr *ParentILE = CollectionInitExprs.back(); - ParentILE->updateInit(SemaSYCLRef.getASTContext(), ParentILE->getNumInits(), - Init.get()); - } - - void addBaseInit(const CXXBaseSpecifier &BS, QualType Ty, - InitializationKind InitKind, MultiExprArg Args) { - InitializedEntity Entity = InitializedEntity::InitializeBase( - SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity.value()); - InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, Args); - ExprResult Init = - InitSeq.Perform(SemaSYCLRef.SemaRef, Entity, InitKind, Args); - - InitListExpr *ParentILE = CollectionInitExprs.back(); - ParentILE->updateInit(SemaSYCLRef.getASTContext(), ParentILE->getNumInits(), - Init.get()); - } - - void addSimpleBaseInit(const CXXBaseSpecifier &BS, QualType Ty) { - InitializationKind InitKind = - InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc); - - InitializedEntity Entity = InitializedEntity::InitializeBase( - SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity.value()); - - Expr *ParamRef = createParamReferenceExpr(); - InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, - ParamRef); - ExprResult Init = - InitSeq.Perform(SemaSYCLRef.SemaRef, Entity, InitKind, ParamRef); - - InitListExpr *ParentILE = CollectionInitExprs.back(); - ParentILE->updateInit(SemaSYCLRef.getASTContext(), ParentILE->getNumInits(), - Init.get()); - } - - // Adds an initializer that handles a simple initialization of a field. - void addSimpleFieldInit(FieldDecl *FD, QualType Ty) { - Expr *ParamRef = createParamReferenceExpr(); - addFieldInit(FD, Ty, ParamRef); - } - - Expr *createGetAddressOf(Expr *E) { - return UnaryOperator::Create( - SemaSYCLRef.getASTContext(), E, UO_AddrOf, - SemaSYCLRef.getASTContext().getPointerType(E->getType()), VK_PRValue, - OK_Ordinary, KernelCallerSrcLoc, false, - SemaSYCLRef.SemaRef.CurFPFeatureOverrides()); - } - - Expr *createDerefOp(Expr *E) { - return UnaryOperator::Create(SemaSYCLRef.getASTContext(), E, UO_Deref, - E->getType()->getPointeeType(), VK_LValue, - OK_Ordinary, KernelCallerSrcLoc, false, - SemaSYCLRef.SemaRef.CurFPFeatureOverrides()); - } - - Expr *createReinterpretCastExpr(Expr *E, QualType To) { - return CXXReinterpretCastExpr::Create( - SemaSYCLRef.getASTContext(), To, VK_PRValue, CK_BitCast, E, - /*Path=*/nullptr, - SemaSYCLRef.getASTContext().getTrivialTypeSourceInfo(To), - SourceLocation(), SourceLocation(), SourceRange()); - } - - void handleGeneratedType(FieldDecl *FD, QualType Ty) { - // Equivalent of the following code is generated here: - // void ocl_kernel(__generated_type GT) { - // Kernel KernelObjClone { *(reinterpret_cast(>)) }; - // } - - Expr *RCE = createReinterpretCastExpr( - createGetAddressOf(createParamReferenceExpr()), - SemaSYCLRef.getASTContext().getPointerType(Ty)); - Expr *Initializer = createDerefOp(RCE); - addFieldInit(FD, Ty, Initializer); - } - - void handleGeneratedType(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType Ty) { - // Equivalent of the following code is generated here: - // void ocl_kernel(__generated_type GT) { - // Kernel KernelObjClone { *(reinterpret_cast(>)) }; - // } - Expr *RCE = createReinterpretCastExpr( - createGetAddressOf(createParamReferenceExpr()), - SemaSYCLRef.getASTContext().getPointerType(Ty)); - Expr *Initializer = createDerefOp(RCE); - InitializationKind InitKind = - InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc); - addBaseInit(BS, Ty, InitKind, Initializer); - } - - MemberExpr *buildMemberExpr(Expr *Base, ValueDecl *Member) { - DeclAccessPair MemberDAP = DeclAccessPair::make(Member, AS_none); - MemberExpr *Result = SemaSYCLRef.SemaRef.BuildMemberExpr( - Base, /*IsArrow */ false, KernelCallerSrcLoc, NestedNameSpecifierLoc(), - KernelCallerSrcLoc, Member, MemberDAP, - /*HadMultipleCandidates*/ false, - DeclarationNameInfo(Member->getDeclName(), KernelCallerSrcLoc), - Member->getType(), VK_LValue, OK_Ordinary); - return Result; - } - - void addFieldMemberExpr(FieldDecl *FD, QualType Ty) { - if (!isArrayElement(FD, Ty)) - MemberExprBases.push_back(buildMemberExpr(MemberExprBases.back(), FD)); - } - - void removeFieldMemberExpr(const FieldDecl *FD, QualType Ty) { - if (!isArrayElement(FD, Ty)) - MemberExprBases.pop_back(); - } - - void createSpecialMethodCall(const CXXRecordDecl *RD, StringRef MethodName, - SmallVectorImpl &AddTo) { - CXXMethodDecl *Method = getMethodByName(RD, MethodName); - if (!Method) - return; - - unsigned NumParams = Method->getNumParams(); - llvm::SmallVector ParamDREs(NumParams); - llvm::ArrayRef KernelParameters = - DeclCreator.getParamVarDeclsForCurrentField(); - for (size_t I = 0; I < NumParams; ++I) { - QualType ParamType = KernelParameters[I]->getOriginalType(); - ParamDREs[I] = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - KernelParameters[I], ParamType, VK_LValue, KernelCallerSrcLoc); - } - - MemberExpr *MethodME = buildMemberExpr(MemberExprBases.back(), Method); - - QualType ResultTy = Method->getReturnType(); - ExprValueKind VK = Expr::getValueKindForType(ResultTy); - ResultTy = ResultTy.getNonLValueExprType(SemaSYCLRef.getASTContext()); - llvm::SmallVector ParamStmts; - const auto *Proto = cast(Method->getType()); - SemaSYCLRef.SemaRef.GatherArgumentsForCall(KernelCallerSrcLoc, Method, - Proto, 0, ParamDREs, ParamStmts); - // [kernel_obj or wrapper object].accessor.__init(_ValueType*, - // range, range, id) - AddTo.push_back(CXXMemberCallExpr::Create( - SemaSYCLRef.getASTContext(), MethodME, ParamStmts, ResultTy, VK, - KernelCallerSrcLoc, FPOptionsOverride())); - } - - // Creates an empty InitListExpr of the correct number of child-inits - // of this to append into. - void addCollectionInitListExpr(const CXXRecordDecl *RD) { - const ASTRecordLayout &Info = - SemaSYCLRef.getASTContext().getASTRecordLayout(RD); - uint64_t NumInitExprs = Info.getFieldCount() + RD->getNumBases(); - addCollectionInitListExpr(QualType(RD->getTypeForDecl(), 0), NumInitExprs); - } - - InitListExpr *createInitListExpr(const CXXRecordDecl *RD) { - const ASTRecordLayout &Info = - SemaSYCLRef.getASTContext().getASTRecordLayout(RD); - uint64_t NumInitExprs = Info.getFieldCount() + RD->getNumBases(); - return createInitListExpr(QualType(RD->getTypeForDecl(), 0), NumInitExprs); - } - - InitListExpr *createInitListExpr(QualType InitTy, uint64_t NumChildInits) { - InitListExpr *ILE = new (SemaSYCLRef.getASTContext()) - InitListExpr(SemaSYCLRef.getASTContext(), KernelCallerSrcLoc, {}, - KernelCallerSrcLoc); - ILE->reserveInits(SemaSYCLRef.getASTContext(), NumChildInits); - ILE->setType(InitTy); - - return ILE; - } - - // Create an empty InitListExpr of the type/size for the rest of the visitor - // to append into. - void addCollectionInitListExpr(QualType InitTy, uint64_t NumChildInits) { - - InitListExpr *ILE = createInitListExpr(InitTy, NumChildInits); - InitListExpr *ParentILE = CollectionInitExprs.back(); - ParentILE->updateInit(SemaSYCLRef.getASTContext(), ParentILE->getNumInits(), - ILE); - - CollectionInitExprs.push_back(ILE); - } - - static VarDecl *createKernelObjClone(ASTContext &Ctx, DeclContext *DC, - const CXXRecordDecl *KernelObj) { - TypeSourceInfo *TSInfo = - KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; - IdentifierInfo *Ident = KernelObj->getIdentifier(); - if (!Ident) - Ident = &Ctx.Idents.get("__SYCLKernel"); - - VarDecl *VD = VarDecl::Create( - Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), Ident, - QualType(KernelObj->getTypeForDecl(), 0), TSInfo, SC_None); - return VD; - } - - const llvm::StringLiteral getInitMethodName() const { - return IsESIMD ? InitESIMDMethodName : InitMethodName; - } - - // Default inits the type, then calls the init-method in the body. - bool handleSpecialType(FieldDecl *FD, QualType Ty) { - addFieldInit(FD, Ty, std::nullopt, - InitializationKind::CreateDefault(KernelCallerSrcLoc)); - - addFieldMemberExpr(FD, Ty); - - const auto *RecordDecl = Ty->getAsCXXRecordDecl(); - createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts); - CXXMethodDecl *FinalizeMethod = - getMethodByName(RecordDecl, FinalizeMethodName); - // A finalize-method is expected for special type such as stream. - if (FinalizeMethod) - createSpecialMethodCall(RecordDecl, FinalizeMethodName, FinalizeStmts); - - removeFieldMemberExpr(FD, Ty); - - return true; - } - - bool handleSpecialType(const CXXBaseSpecifier &BS, QualType Ty) { - const auto *RecordDecl = Ty->getAsCXXRecordDecl(); - addBaseInit(BS, Ty, InitializationKind::CreateDefault(KernelCallerSrcLoc)); - createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts); - return true; - } - - // Generate __init call for kernel handler argument - void handleSpecialType(QualType KernelHandlerTy) { - DeclRefExpr *KernelHandlerCloneRef = DeclRefExpr::Create( - SemaSYCLRef.getASTContext(), NestedNameSpecifierLoc(), - KernelCallerSrcLoc, KernelHandlerClone, false, DeclarationNameInfo(), - KernelHandlerTy, VK_LValue); - const auto *RecordDecl = - KernelHandlerClone->getType()->getAsCXXRecordDecl(); - MemberExprBases.push_back(KernelHandlerCloneRef); - createSpecialMethodCall(RecordDecl, InitSpecConstantsBuffer, BodyStmts); - MemberExprBases.pop_back(); - } - - void createKernelHandlerClone(ASTContext &Ctx, DeclContext *DC, - ParmVarDecl *KernelHandlerArg) { - QualType Ty = KernelHandlerArg->getType(); - TypeSourceInfo *TSInfo = Ctx.getTrivialTypeSourceInfo(Ty); - KernelHandlerClone = - VarDecl::Create(Ctx, DC, KernelCallerSrcLoc, KernelCallerSrcLoc, - KernelHandlerArg->getIdentifier(), Ty, TSInfo, SC_None); - - // Default initialize clone - InitializedEntity VarEntity = - InitializedEntity::InitializeVariable(KernelHandlerClone); - InitializationKind InitKind = - InitializationKind::CreateDefault(KernelCallerSrcLoc); - InitializationSequence InitSeq(SemaSYCLRef.SemaRef, VarEntity, InitKind, - std::nullopt); - ExprResult Init = - InitSeq.Perform(SemaSYCLRef.SemaRef, VarEntity, InitKind, std::nullopt); - KernelHandlerClone->setInit( - SemaSYCLRef.SemaRef.MaybeCreateExprWithCleanups(Init.get())); - KernelHandlerClone->setInitStyle(VarDecl::CallInit); - } - - Expr *createArraySubscriptExpr(uint64_t Index, Expr *ArrayRef) { - QualType SizeT = SemaSYCLRef.getASTContext().getSizeType(); - llvm::APInt IndexVal{ - static_cast(SemaSYCLRef.getASTContext().getTypeSize(SizeT)), - Index, SizeT->isSignedIntegerType()}; - auto IndexLiteral = IntegerLiteral::Create( - SemaSYCLRef.getASTContext(), IndexVal, SizeT, KernelCallerSrcLoc); - ExprResult IndexExpr = SemaSYCLRef.SemaRef.CreateBuiltinArraySubscriptExpr( - ArrayRef, KernelCallerSrcLoc, IndexLiteral, KernelCallerSrcLoc); - assert(!IndexExpr.isInvalid()); - return IndexExpr.get(); - } - - void addSimpleArrayInit(FieldDecl *FD, QualType FieldTy) { - Expr *ArrayRef = createSimpleArrayParamReferenceExpr(FieldTy); - InitializationKind InitKind = InitializationKind::CreateDirect({}, {}, {}); - - InitializedEntity Entity = InitializedEntity::InitializeMember( - FD, &VarEntity.value(), /*Implicit*/ true); - - addFieldInit(FD, FieldTy, ArrayRef, InitKind, Entity); - } - - void addArrayElementInit(FieldDecl *FD, QualType T) { - Expr *RCE = createReinterpretCastExpr( - createGetAddressOf(ArrayParamBases.pop_back_val()), - SemaSYCLRef.getASTContext().getPointerType(T)); - Expr *Initializer = createDerefOp(RCE); - addFieldInit(FD, T, Initializer); - } - - // This function is recursive in order to handle - // multi-dimensional arrays. If the array element is - // an array, it implies that the array is multi-dimensional. - // We continue recursion till we reach a non-array element to - // generate required array subscript expressions. - void createArrayInit(FieldDecl *FD, QualType T) { - const ConstantArrayType *CAT = - SemaSYCLRef.getASTContext().getAsConstantArrayType(T); - - if (!CAT) { - addArrayElementInit(FD, T); - return; - } - - QualType ET = CAT->getElementType(); - uint64_t ElemCount = CAT->getSize().getZExtValue(); - enterArray(FD, T, ET); - - for (uint64_t Index = 0; Index < ElemCount; ++Index) { - ArrayInfos.back().second = Index; - Expr *ArraySubscriptExpr = - createArraySubscriptExpr(Index, ArrayParamBases.back()); - ArrayParamBases.push_back(ArraySubscriptExpr); - createArrayInit(FD, ET); - } - - leaveArray(FD, T, ET); - } - - // This function is used to create initializers for a top - // level array which contains pointers. The openCl kernel - // parameter for this array will be a wrapper class - // which contains the generated type. This function generates - // code equivalent to: - // void ocl_kernel(__wrapper_class WrappedGT) { - // Kernel KernelObjClone { - // *reinterpret_cast(&WrappedGT.GeneratedArr[0]), - // *reinterpret_cast(&WrappedGT.GeneratedArr[1]), - // *reinterpret_cast(&WrappedGT.GeneratedArr[2]) - // }; - // } - void handleGeneratedArrayType(FieldDecl *FD, QualType FieldTy) { - ArrayParamBases.push_back(createSimpleArrayParamReferenceExpr(FieldTy)); - createArrayInit(FD, FieldTy); - } - -public: - static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelBodyCreator(SemaSYCL &S, SyclKernelDeclCreator &DC, - const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, - CXXMethodDecl *CallOperator) - : SyclKernelFieldHandler(S), - UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(KernelObj)), - DeclCreator(DC), - KernelObjClone(UseTopLevelKernelObj - ? nullptr - : createKernelObjClone(S.getASTContext(), - DC.getKernelDecl(), - KernelObj)), - VarEntity(), KernelCallerFunc(KernelCallerFunc), - KernelCallerSrcLoc(KernelCallerFunc->getLocation()), - IsESIMD(IsSIMDKernel), CallOperator(CallOperator) { - if (!UseTopLevelKernelObj) { - VarEntity.emplace(InitializedEntity::InitializeVariable(KernelObjClone)); - Stmt *DS = new (S.getASTContext()) DeclStmt( - DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); - BodyStmts.push_back(DS); - CollectionInitExprs.push_back(createInitListExpr(KernelObj)); - DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create( - S.getASTContext(), NestedNameSpecifierLoc(), KernelCallerSrcLoc, - KernelObjClone, false, DeclarationNameInfo(), - QualType(KernelObj->getTypeForDecl(), 0), VK_LValue); - MemberExprBases.push_back(KernelObjCloneRef); - } - } - - ~SyclKernelBodyCreator() { - annotateHierarchicalParallelismAPICalls(); - CompoundStmt *KernelBody = createKernelBody(); - DeclCreator.setBody(KernelBody); - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType Ty) final { - return handleSpecialType(FD, Ty); - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType Ty) final { - return handleSpecialType(BS, Ty); - } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - Expr *PointerRef = - createPointerParamReferenceExpr(FieldTy, StructDepth != 0); - addFieldInit(FD, FieldTy, PointerRef); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - if (FD->hasAttr()) - handleGeneratedArrayType(FD, FieldTy); - else - addSimpleArrayInit(FD, FieldTy); - return true; - } - - bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { - // As the functor is passed as a whole, use the param as the vardecl - // otherwise used as the clone. - KernelObjClone = DeclCreator.getParamVarDeclsForCurrentField()[0]; - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - assert(RD && "Type must be a C++ record type"); - if (RD->hasAttr()) - handleGeneratedType(FD, Ty); - else - addSimpleFieldInit(FD, Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *RD, - const CXXBaseSpecifier &BS, QualType Ty) final { - CXXRecordDecl *BaseDecl = Ty->getAsCXXRecordDecl(); - assert(BaseDecl && "Type must be a C++ record type"); - if (BaseDecl->hasAttr()) - handleGeneratedType(RD, BS, Ty); - else - addSimpleBaseInit(BS, Ty); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addSimpleFieldInit(FD, FieldTy); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - addSimpleFieldInit(FD, FieldTy); - return true; - } - - // Default inits the type, then calls the init-method in the body - void handleSyclKernelHandlerType(ParmVarDecl *KernelHandlerArg) { - - // Create and default initialize local clone of kernel handler - createKernelHandlerClone(SemaSYCLRef.getASTContext(), - DeclCreator.getKernelDecl(), KernelHandlerArg); - - // Add declaration statement to openCL kernel body - Stmt *DS = new (SemaSYCLRef.getASTContext()) - DeclStmt(DeclGroupRef(KernelHandlerClone), KernelCallerSrcLoc, - KernelCallerSrcLoc); - BodyStmts.push_back(DS); - - // Generate - // KernelHandlerClone.__init_specialization_constants_buffer(specialization_constants_buffer) - // call if target does not have native support for specialization constants. - // Here, specialization_constants_buffer is the compiler generated kernel - // argument of type char*. - if (!isDefaultSPIRArch(SemaSYCLRef.getASTContext())) - handleSpecialType(KernelHandlerArg->getType()); - } - - bool enterStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { - ++StructDepth; - addCollectionInitListExpr(Ty->getAsCXXRecordDecl()); - - addFieldMemberExpr(FD, Ty); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { - --StructDepth; - CollectionInitExprs.pop_back(); - - removeFieldMemberExpr(FD, Ty); - return true; - } - - bool enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType) final { - ++StructDepth; - - CXXCastPath BasePath; - QualType DerivedTy(RD->getTypeForDecl(), 0); - QualType BaseTy = BS.getType(); - SemaSYCLRef.SemaRef.CheckDerivedToBaseConversion( - DerivedTy, BaseTy, KernelCallerSrcLoc, SourceRange(), &BasePath, - /*IgnoreBaseAccess*/ true); - auto Cast = ImplicitCastExpr::Create( - SemaSYCLRef.getASTContext(), BaseTy, CK_DerivedToBase, - MemberExprBases.back(), - /* CXXCastPath=*/&BasePath, VK_LValue, FPOptionsOverride()); - MemberExprBases.push_back(Cast); - addCollectionInitListExpr(BaseTy->getAsCXXRecordDecl()); - return true; - } - - bool leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType) final { - --StructDepth; - MemberExprBases.pop_back(); - CollectionInitExprs.pop_back(); - return true; - } - - bool enterArray(FieldDecl *FD, QualType ArrayType, - QualType ElementType) final { - const ConstantArrayType *CAT = - SemaSYCLRef.getASTContext().getAsConstantArrayType(ArrayType); - assert(CAT && "Should only be called on constant-size array."); - uint64_t ArraySize = CAT->getSize().getZExtValue(); - addCollectionInitListExpr(ArrayType, ArraySize); - ArrayInfos.emplace_back(getFieldEntity(FD, ArrayType), 0); - - // If this is the top-level array, we need to make a MemberExpr in addition - // to an array subscript. - addFieldMemberExpr(FD, ArrayType); - return true; - } - - bool nextElement(QualType, uint64_t Index) final { - ArrayInfos.back().second = Index; - - // Pop off the last member expr base. - if (Index != 0) - MemberExprBases.pop_back(); - - MemberExprBases.push_back( - createArraySubscriptExpr(Index, MemberExprBases.back())); - return true; - } - - bool leaveArray(FieldDecl *FD, QualType ArrayType, - QualType ElementType) final { - CollectionInitExprs.pop_back(); - ArrayInfos.pop_back(); - - // Remove the IndexExpr. - if (!FD->hasAttr()) - MemberExprBases.pop_back(); - else - ArrayParamBases.pop_back(); - - // Remove the field access expr as well. - removeFieldMemberExpr(FD, ArrayType); - return true; - } - using SyclKernelFieldHandler::enterArray; - using SyclKernelFieldHandler::enterStruct; - using SyclKernelFieldHandler::handleNonDecompStruct; - using SyclKernelFieldHandler::handlePointerType; - using SyclKernelFieldHandler::handleScalarType; - using SyclKernelFieldHandler::handleSyclSpecialType; - using SyclKernelFieldHandler::handleUnionType; - using SyclKernelFieldHandler::leaveArray; - using SyclKernelFieldHandler::leaveStruct; -}; - -class FreeFunctionKernelBodyCreator : public SyclKernelFieldHandler { - SyclKernelDeclCreator &DeclCreator; - llvm::SmallVector BodyStmts; - FunctionDecl *FreeFunc = nullptr; - SourceLocation FreeFunctionSrcLoc; // Free function source location. - llvm::SmallVector ArgExprs; - - // Creates a DeclRefExpr to the ParmVar that represents the current free - // function parameter. - Expr *createParamReferenceExpr() { - ParmVarDecl *FreeFunctionParameter = - DeclCreator.getParamVarDeclsForCurrentField()[0]; - - QualType FreeFunctionParamType = FreeFunctionParameter->getOriginalType(); - Expr *DRE = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - FreeFunctionParameter, FreeFunctionParamType, VK_LValue, - FreeFunctionSrcLoc); - DRE = SemaSYCLRef.SemaRef.DefaultLvalueConversion(DRE).get(); - return DRE; - } - - // Creates a DeclRefExpr to the ParmVar that represents the current pointer - // parameter. - Expr *createPointerParamReferenceExpr(QualType PointerTy) { - ParmVarDecl *FreeFunctionParameter = - DeclCreator.getParamVarDeclsForCurrentField()[0]; - - QualType FreeFunctionParamType = FreeFunctionParameter->getOriginalType(); - Expr *DRE = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - FreeFunctionParameter, FreeFunctionParamType, VK_LValue, - FreeFunctionSrcLoc); - DRE = SemaSYCLRef.SemaRef.DefaultLvalueConversion(DRE).get(); - - if (PointerTy->getPointeeType().getAddressSpace() != - FreeFunctionParamType->getPointeeType().getAddressSpace()) - DRE = ImplicitCastExpr::Create(SemaSYCLRef.getASTContext(), PointerTy, - CK_AddressSpaceConversion, DRE, nullptr, - VK_PRValue, FPOptionsOverride()); - return DRE; - } - - Expr *createGetAddressOf(Expr *E) { - return UnaryOperator::Create( - SemaSYCLRef.getASTContext(), E, UO_AddrOf, - SemaSYCLRef.getASTContext().getPointerType(E->getType()), VK_PRValue, - OK_Ordinary, SourceLocation(), false, - SemaSYCLRef.SemaRef.CurFPFeatureOverrides()); - } - - Expr *createDerefOp(Expr *E) { - return UnaryOperator::Create(SemaSYCLRef.getASTContext(), E, UO_Deref, - E->getType()->getPointeeType(), VK_LValue, - OK_Ordinary, SourceLocation(), false, - SemaSYCLRef.SemaRef.CurFPFeatureOverrides()); - } - - Expr *createReinterpretCastExpr(Expr *E, QualType To) { - return CXXReinterpretCastExpr::Create( - SemaSYCLRef.getASTContext(), To, VK_PRValue, CK_BitCast, E, - /*Path=*/nullptr, - SemaSYCLRef.getASTContext().getTrivialTypeSourceInfo(To), - SourceLocation(), SourceLocation(), SourceRange()); - } - - Expr *createCopyInitExpr(ParmVarDecl *OrigFunctionParameter) { - Expr *DRE = createParamReferenceExpr(); - - assert(OrigFunctionParameter && "no parameter?"); - - CXXRecordDecl *RD = OrigFunctionParameter->getType()->getAsCXXRecordDecl(); - InitializedEntity Entity = InitializedEntity::InitializeParameter( - SemaSYCLRef.getASTContext(), OrigFunctionParameter); - - if (RD->hasAttr()) { - DRE = createReinterpretCastExpr( - createGetAddressOf(DRE), SemaSYCLRef.getASTContext().getPointerType( - OrigFunctionParameter->getType())); - DRE = createDerefOp(DRE); - } - - ExprResult ArgE = SemaSYCLRef.SemaRef.PerformCopyInitialization( - Entity, SourceLocation(), DRE, false, false); - return ArgE.getAs(); - } - - // For a free function such as: - // void f(int i, int* p, struct Simple S) { ... } - // - // Keep the function as-is for the version callable from device code. - // void f(int i, int *p, struct Simple S) { ... } - // - // For the host-callable kernel function generate this: - // void __sycl_kernel_f(int __arg_i, int* __arg_p, struct Simple __arg_S) - // { - // f(__arg_i, __arg_p, __arg_S); - // } - CompoundStmt *createFreeFunctionKernelBody() { - SemaSYCLRef.SemaRef.PushFunctionScope(); - Expr *Fn = SemaSYCLRef.SemaRef.BuildDeclRefExpr( - FreeFunc, FreeFunc->getType(), VK_LValue, FreeFunctionSrcLoc); - ASTContext &Context = SemaSYCLRef.getASTContext(); - QualType ResultTy = FreeFunc->getReturnType(); - ExprValueKind VK = Expr::getValueKindForType(ResultTy); - ResultTy = ResultTy.getNonLValueExprType(Context); - Fn = ImplicitCastExpr::Create(Context, - Context.getPointerType(FreeFunc->getType()), - CK_FunctionToPointerDecay, Fn, nullptr, - VK_PRValue, FPOptionsOverride()); - auto CallExpr = CallExpr::Create(Context, Fn, ArgExprs, ResultTy, VK, - FreeFunctionSrcLoc, FPOptionsOverride()); - BodyStmts.push_back(CallExpr); - return CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), {}, - {}); - } - -public: - static constexpr const bool VisitInsideSimpleContainers = false; - - FreeFunctionKernelBodyCreator(SemaSYCL &S, SyclKernelDeclCreator &DC, - FunctionDecl *FF) - : SyclKernelFieldHandler(S), DeclCreator(DC), FreeFunc(FF), - FreeFunctionSrcLoc(FF->getLocation()) {} - - ~FreeFunctionKernelBodyCreator() { - CompoundStmt *KernelBody = createFreeFunctionKernelBody(); - DeclCreator.setBody(KernelBody); - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType Ty) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, - QualType Ty) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handlePointerType(ParmVarDecl *PD, QualType ParamTy) final { - Expr *PointerRef = createPointerParamReferenceExpr(ParamTy); - ArgExprs.push_back(PointerRef); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, ParmVarDecl *PD, - QualType) final { - Expr *TempCopy = createCopyInitExpr(PD); - ArgExprs.push_back(TempCopy); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *RD, - const CXXBaseSpecifier &BS, QualType Ty) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleScalarType(ParmVarDecl *, QualType) final { - Expr *ParamRef = createParamReferenceExpr(); - ArgExprs.push_back(ParamRef); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handleUnionType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterArray(FieldDecl *FD, QualType ArrayType, - QualType ElementType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterArray(ParmVarDecl *PD, QualType ArrayType, - QualType ElementType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveArray(FieldDecl *FD, QualType ArrayType, - QualType ElementType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveArray(ParmVarDecl *PD, QualType ArrayType, - QualType ElementType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } -}; - -// Kernels are only the unnamed-lambda feature if the feature is enabled, AND -// the first template argument has been corrected by the library to match the -// functor type. -static bool IsSYCLUnnamedKernel(SemaSYCL &SemaSYCLRef, const FunctionDecl *FD) { - if (!SemaSYCLRef.getLangOpts().SYCLUnnamedLambda) - return false; - - QualType FunctorTy = GetSYCLKernelObjectType(FD); - QualType TmplArgTy = calculateKernelNameType(SemaSYCLRef.getASTContext(), FD); - return SemaSYCLRef.getASTContext().hasSameType(FunctorTy, TmplArgTy); -} - -class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { - SYCLIntegrationHeader &Header; - int64_t CurOffset = 0; - llvm::SmallVector ArrayBaseOffsets; - int StructDepth = 0; - - // A series of functions to calculate the change in offset based on the type. - int64_t offsetOf(const FieldDecl *FD, QualType ArgTy) const { - return isArrayElement(FD, ArgTy) - ? 0 - : SemaSYCLRef.getASTContext().getFieldOffset(FD) / 8; - } - // For free functions each parameter is stand-alone, so offsets within a - // lambda/function object are not relevant. Therefore offsetOf will always be - // 0. - int64_t offsetOf(const ParmVarDecl *, QualType) const { return 0; } - - int64_t offsetOf(const CXXRecordDecl *RD, const CXXRecordDecl *Base) const { - const ASTRecordLayout &Layout = - SemaSYCLRef.getASTContext().getASTRecordLayout(RD); - return Layout.getBaseClassOffset(Base).getQuantity(); - } - - void addParam(const FieldDecl *FD, QualType ArgTy, - SYCLIntegrationHeader::kernel_param_kind_t Kind) { - addParam(ArgTy, Kind, offsetOf(FD, ArgTy)); - } - - // For free functions we increment the current offset as each parameter is - // added. - void addParam(const ParmVarDecl *PD, QualType ParamTy, - SYCLIntegrationHeader::kernel_param_kind_t Kind) { - addParam(ParamTy, Kind, offsetOf(PD, ParamTy)); - CurOffset += - SemaSYCLRef.getASTContext().getTypeSizeInChars(ParamTy).getQuantity(); - } - - void addParam(QualType ParamTy, - SYCLIntegrationHeader::kernel_param_kind_t Kind, - uint64_t OffsetAdj) { - uint64_t Size; - Size = - SemaSYCLRef.getASTContext().getTypeSizeInChars(ParamTy).getQuantity(); - Header.addParamDesc(Kind, static_cast(Size), - static_cast(CurOffset + OffsetAdj)); - } - -public: - static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelIntHeaderCreator(bool IsESIMD, SemaSYCL &S, - SYCLIntegrationHeader &H, - const CXXRecordDecl *KernelObj, QualType NameType, - FunctionDecl *KernelFunc) - : SyclKernelFieldHandler(S), Header(H) { - - // The header needs to access the kernel object size. - int64_t ObjSize = SemaSYCLRef.getASTContext() - .getTypeSizeInChars(KernelObj->getTypeForDecl()) - .getQuantity(); - Header.startKernel(KernelFunc, NameType, KernelObj->getLocation(), IsESIMD, - IsSYCLUnnamedKernel(S, KernelFunc), ObjSize); - } - - SyclKernelIntHeaderCreator(SemaSYCL &S, SYCLIntegrationHeader &H, - QualType NameType, FunctionDecl *FreeFunc) - : SyclKernelFieldHandler(S), Header(H) { - Header.startKernel(FreeFunc, NameType, FreeFunc->getLocation(), - false /*IsESIMD*/, true /*IsSYCLUnnamedKernel*/, - 0 /*ObjSize*/); - } - - bool handleSyclSpecialType(const CXXRecordDecl *RD, - const CXXBaseSpecifier &BC, - QualType FieldTy) final { - const auto *AccTy = - cast(FieldTy->getAsRecordDecl()); - assert(AccTy->getTemplateArgs().size() >= 2 && - "Incorrect template args for Accessor Type"); - int Dims = static_cast( - AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); - int Info = getAccessTarget(FieldTy, AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - CurOffset + - offsetOf(RD, BC.getType()->getAsCXXRecordDecl())); - return true; - } - - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { - const auto *ClassTy = FieldTy->getAsCXXRecordDecl(); - assert(ClassTy && "Type must be a C++ record type"); - if (isSyclAccessorType(FieldTy)) { - const auto *AccTy = - cast(FieldTy->getAsRecordDecl()); - assert(AccTy->getTemplateArgs().size() >= 2 && - "Incorrect template args for Accessor Type"); - int Dims = static_cast( - AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); - int Info = getAccessTarget(FieldTy, AccTy) | (Dims << 11); - - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - CurOffset + offsetOf(FD, FieldTy)); - } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::stream)) { - addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); - } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { - addParam(FieldTy, SYCLIntegrationHeader::kind_work_group_memory, - offsetOf(FD, FieldTy)); - } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::sampler) || - SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::annotated_ptr) || - SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::annotated_arg)) { - CXXMethodDecl *InitMethod = getMethodByName(ClassTy, InitMethodName); - assert(InitMethod && "type must have __init method"); - const ParmVarDecl *InitArg = InitMethod->getParamDecl(0); - assert(InitArg && "Init method must have arguments"); - QualType T = InitArg->getType(); - SYCLIntegrationHeader::kernel_param_kind_t ParamKind = - SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::sampler) - ? SYCLIntegrationHeader::kind_sampler - : (T->isPointerType() ? SYCLIntegrationHeader::kind_pointer - : SYCLIntegrationHeader::kind_std_layout); - addParam(T, ParamKind, offsetOf(FD, FieldTy)); - } else { - llvm_unreachable( - "Unexpected SYCL special class when generating integration header"); - } - return true; - } - - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy, - ((StructDepth) ? SYCLIntegrationHeader::kind_std_layout - : SYCLIntegrationHeader::kind_pointer)); - return true; - } - - bool handlePointerType(ParmVarDecl *PD, QualType ParamTy) final { - addParam(PD, ParamTy, SYCLIntegrationHeader::kind_pointer); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); - return true; - } - - bool handleScalarType(ParmVarDecl *PD, QualType ParamTy) final { - addParam(PD, ParamTy, SYCLIntegrationHeader::kind_std_layout); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - // Arrays are always wrapped inside of structs, so just treat it as a simple - // struct. - addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); - return true; - } - - bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { - addParam(Ty, SYCLIntegrationHeader::kind_std_layout, /*Offset=*/0); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - addParam(FD, Ty, SYCLIntegrationHeader::kind_std_layout); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, ParmVarDecl *PD, - QualType ParamTy) final { - addParam(PD, ParamTy, SYCLIntegrationHeader::kind_std_layout); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *Base, - const CXXBaseSpecifier &, QualType Ty) final { - addParam(Ty, SYCLIntegrationHeader::kind_std_layout, - offsetOf(Base, Ty->getAsCXXRecordDecl())); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - return handleScalarType(FD, FieldTy); - } - - bool handleUnionType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - void handleSyclKernelHandlerType(QualType Ty) { - // The compiler generated kernel argument used to initialize SYCL 2020 - // specialization constants, `specialization_constants_buffer`, should - // have corresponding entry in integration header. - ASTContext &Context = SemaSYCLRef.getASTContext(); - // Offset is zero since kernel_handler argument is not part of - // kernel object (i.e. it is not captured) - addParam(Context.getPointerType(Context.CharTy), - SYCLIntegrationHeader::kind_specialization_constants_buffer, 0); - } - - bool enterStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { - ++StructDepth; - CurOffset += offsetOf(FD, Ty); - return true; - } - - bool enterStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { - --StructDepth; - CurOffset -= offsetOf(FD, Ty); - return true; - } - - bool leaveStruct(const CXXRecordDecl *, ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType) final { - CurOffset += offsetOf(RD, BS.getType()->getAsCXXRecordDecl()); - return true; - } - - bool leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, - QualType) final { - CurOffset -= offsetOf(RD, BS.getType()->getAsCXXRecordDecl()); - return true; - } - - bool enterArray(FieldDecl *FD, QualType ArrayTy, QualType) final { - ArrayBaseOffsets.push_back(CurOffset + offsetOf(FD, ArrayTy)); - return true; - } - - bool enterArray(ParmVarDecl *PD, QualType ArrayTy, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - bool nextElement(QualType ET, uint64_t Index) final { - int64_t Size = - SemaSYCLRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); - CurOffset = ArrayBaseOffsets.back() + Size * Index; - return true; - } - - bool leaveArray(FieldDecl *FD, QualType ArrayTy, QualType) final { - CurOffset = ArrayBaseOffsets.pop_back_val(); - CurOffset -= offsetOf(FD, ArrayTy); - return true; - } - - bool leaveArray(ParmVarDecl *PD, QualType ArrayTy, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; - } - - using SyclKernelFieldHandler::enterStruct; - using SyclKernelFieldHandler::leaveStruct; -}; - -class SyclKernelIntFooterCreator : public SyclKernelFieldHandler { - SYCLIntegrationFooter &Footer; - -public: - SyclKernelIntFooterCreator(SemaSYCL &S, SYCLIntegrationFooter &F) - : SyclKernelFieldHandler(S), Footer(F) { - (void)Footer; // workaround for unused field warning - } -}; - -} // namespace - -class SYCLKernelNameTypeVisitor - : public TypeVisitor, - public ConstTemplateArgumentVisitor { - SemaSYCL &S; - SourceLocation KernelInvocationFuncLoc; - QualType KernelNameType; - using InnerTypeVisitor = TypeVisitor; - using InnerTemplArgVisitor = - ConstTemplateArgumentVisitor; - bool IsInvalid = false; - bool IsUnnamedKernel = false; - - void VisitTemplateArgs(ArrayRef Args) { - for (auto &A : Args) - Visit(A); - } - -public: - SYCLKernelNameTypeVisitor(SemaSYCL &S, SourceLocation KernelInvocationFuncLoc, - QualType KernelNameType, bool IsUnnamedKernel) - : S(S), KernelInvocationFuncLoc(KernelInvocationFuncLoc), - KernelNameType(KernelNameType), IsUnnamedKernel(IsUnnamedKernel) {} - - bool isValid() { return !IsInvalid; } - - void Visit(QualType T) { - if (T.isNull()) - return; - - const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); - // If KernelNameType has template args visit each template arg via - // ConstTemplateArgumentVisitor - if (const auto *TSD = - dyn_cast_or_null(RD)) { - ArrayRef Args = TSD->getTemplateArgs().asArray(); - - VisitTemplateArgs(Args); - } else { - InnerTypeVisitor::Visit(T.getTypePtr()); - } - } - - void Visit(const TemplateArgument &TA) { - if (TA.isNull()) - return; - InnerTemplArgVisitor::Visit(TA); - } - - void VisitBuiltinType(const BuiltinType *TT) { - if (TT->isNullPtrType()) { - S.Diag(KernelInvocationFuncLoc, diag::err_nullptr_t_type_in_sycl_kernel) - << KernelNameType; - - IsInvalid = true; - } - return; - } - - void VisitTagType(const TagType *TT) { - return DiagnoseKernelNameType(TT->getDecl()); - } - - void DiagnoseKernelNameType(const NamedDecl *DeclNamed) { - if (!IsUnnamedKernel) { - NotForwardDeclarableReason NFDR = isForwardDeclarable(DeclNamed, S); - switch (NFDR) { - case NotForwardDeclarableReason::UnscopedEnum: - S.Diag(KernelInvocationFuncLoc, diag::err_sycl_kernel_incorrectly_named) - << /* unscoped enum requires fixed underlying type */ 1 - << DeclNamed; - IsInvalid = true; - return; - case NotForwardDeclarableReason::StdNamespace: - S.Diag(KernelInvocationFuncLoc, - diag::err_invalid_std_type_in_sycl_kernel) - << KernelNameType << DeclNamed; - IsInvalid = true; - return; - case NotForwardDeclarableReason::UnnamedTag: - S.Diag(KernelInvocationFuncLoc, diag::err_sycl_kernel_incorrectly_named) - << /* unnamed type is invalid */ 2 << KernelNameType; - IsInvalid = true; - return; - case NotForwardDeclarableReason::NotAtNamespaceScope: - S.Diag(KernelInvocationFuncLoc, diag::err_sycl_kernel_incorrectly_named) - << /* kernel name should be forward declarable at namespace - scope */ - 0 << KernelNameType; - IsInvalid = true; - return; - case NotForwardDeclarableReason::None: - default: - // Do nothing, we're fine. - break; - } - } - } - - void VisitTypeTemplateArgument(const TemplateArgument &TA) { - QualType T = TA.getAsType(); - if (const auto *ET = T->getAs()) - VisitTagType(ET); - else - Visit(T); - } - - void VisitIntegralTemplateArgument(const TemplateArgument &TA) { - QualType T = TA.getIntegralType(); - if (const EnumType *ET = T->getAs()) - VisitTagType(ET); - } - - void VisitTemplateTemplateArgument(const TemplateArgument &TA) { - TemplateDecl *TD = TA.getAsTemplate().getAsTemplateDecl(); - assert(TD && "template declaration must be available"); - TemplateParameterList *TemplateParams = TD->getTemplateParameters(); - for (NamedDecl *P : *TemplateParams) { - if (NonTypeTemplateParmDecl *TemplateParam = - dyn_cast(P)) - if (const EnumType *ET = TemplateParam->getType()->getAs()) - VisitTagType(ET); - } - } - - void VisitPackTemplateArgument(const TemplateArgument &TA) { - VisitTemplateArgs(TA.getPackAsArray()); - } -}; - -void SemaSYCL::CheckSYCLKernelCall(FunctionDecl *KernelFunc, - ArrayRef Args) { - QualType KernelNameType = - calculateKernelNameType(getASTContext(), KernelFunc); - SYCLKernelNameTypeVisitor KernelNameTypeVisitor( - *this, Args[0]->getExprLoc(), KernelNameType, - IsSYCLUnnamedKernel(*this, KernelFunc)); - KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); - - // FIXME: In place until the library works around its 'host' invocation - // issues. - if (!SemaRef.LangOpts.SYCLIsDevice) - return; - - const CXXRecordDecl *KernelObj = - GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); - - if (!KernelObj || (KernelObj && !KernelObj->hasDefinition())) { - Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); - KernelFunc->setInvalidDecl(); - return; - } - - if (KernelObj->isLambda()) { - for (const LambdaCapture &LC : KernelObj->captures()) - if (LC.capturesThis() && LC.isImplicit()) { - Diag(LC.getLocation(), diag::err_implicit_this_capture); - KernelFunc->setInvalidDecl(); - } - } - - // check that calling kernel conforms to spec - QualType KernelParamTy = KernelFunc->getParamDecl(0)->getType(); - if (not KernelParamTy->isReferenceType()) { - // passing by value. emit warning if using SYCL 2020 or greater - if (SemaRef.LangOpts.getSYCLVersion() >= LangOptions::SYCL_2020) - Diag(KernelFunc->getLocation(), diag::warn_sycl_pass_by_value_deprecated); - } - - // Do not visit invalid kernel object. - if (KernelObj->isInvalidDecl()) - return; - - SyclKernelDecompMarker DecompMarker(*this); - SyclKernelFieldChecker FieldChecker(*this); - SyclKernelUnionChecker UnionChecker(*this); - - KernelObjVisitor Visitor{*this}; - - DiagnosingSYCLKernel = true; - - // Emit diagnostics for SYCL device kernels only - Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); - Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, - DecompMarker); - Visitor.visitTopLevelRecord(KernelObj, GetSYCLKernelObjectType(KernelFunc), - FieldChecker, UnionChecker, DecompMarker); - - DiagnosingSYCLKernel = false; - // Set the kernel function as invalid, if any of the checkers fail validation. - if (!FieldChecker.isValid() || !UnionChecker.isValid() || - !KernelNameTypeVisitor.isValid()) - KernelFunc->setInvalidDecl(); -} - -// For a wrapped parallel_for, copy attributes from original -// kernel to wrapped kernel. -void SemaSYCL::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) { - // Get the operator() function of the wrapper. - assert(CallOperator && "invalid kernel object"); - - typedef std::pair ChildParentPair; - llvm::SmallPtrSet Visited; - llvm::SmallVector WorkList; - WorkList.push_back({CallOperator, nullptr}); - FunctionDecl *KernelBody = nullptr; - - CallGraph SYCLCG; - SYCLCG.addToCallGraph(CallOperator); - while (!WorkList.empty()) { - FunctionDecl *FD = WorkList.back().first; - FunctionDecl *ParentFD = WorkList.back().second; - - if ((ParentFD == CallOperator) && isSYCLKernelBodyFunction(FD)) { - KernelBody = FD; - break; - } - - WorkList.pop_back(); - if (!Visited.insert(FD).second) - continue; // We've already seen this Decl - - CallGraphNode *N = SYCLCG.getNode(FD); - if (!N) - continue; - - for (const CallGraphNode *CI : *N) { - if (auto *Callee = dyn_cast(CI->getDecl())) { - Callee = Callee->getMostRecentDecl(); - if (!Visited.count(Callee)) - WorkList.push_back({Callee, FD}); - } - } - } - - assert(KernelBody && "improper parallel_for wrap"); - if (KernelBody) { - llvm::SmallVector Attrs; - collectSYCLAttributes(*this, KernelBody, Attrs, /*DirectlyCalled*/ true); - if (!Attrs.empty()) - llvm::for_each(Attrs, - [CallOperator](Attr *A) { CallOperator->addAttr(A); }); - } -} - -void SemaSYCL::SetSYCLKernelNames() { - std::unique_ptr MangleCtx( - getASTContext().createMangleContext()); - // We assume the list of KernelDescs is the complete list of kernels needing - // to be rewritten. - for (const std::pair &Pair : - SyclKernelsToOpenCLKernels) { - std::string CalculatedName, StableName; - StringRef KernelName; - if (isFreeFunction(*this, Pair.first)) { - std::tie(CalculatedName, StableName) = - constructFreeFunctionKernelName(*this, Pair.first, *MangleCtx); - KernelName = CalculatedName; - } else { - std::tie(CalculatedName, StableName) = - constructKernelName(*this, Pair.first, *MangleCtx); - KernelName = - IsSYCLUnnamedKernel(*this, Pair.first) ? StableName : CalculatedName; - } - - getSyclIntegrationHeader().updateKernelNames(Pair.first, KernelName, - StableName); - - // Set name of generated kernel. - Pair.second->setDeclName(&getASTContext().Idents.get(KernelName)); - // Update the AsmLabel for this generated kernel. - Pair.second->addAttr( - AsmLabelAttr::CreateImplicit(getASTContext(), KernelName)); - } -} - -// Generates the OpenCL kernel using KernelCallerFunc (kernel caller -// function) defined is SYCL headers. -// Generated OpenCL kernel contains the body of the kernel caller function, -// receives OpenCL like parameters and additionally does some manipulation to -// initialize captured lambda/functor fields with these parameters. -// SYCL runtime marks kernel caller function with sycl_kernel attribute. -// To be able to generate OpenCL kernel from KernelCallerFunc we put -// the following requirements to the function which SYCL runtime can mark with -// sycl_kernel attribute: -// - Must be template function with at least two template parameters. -// First parameter must represent "unique kernel name" -// Second parameter must be the function object type -// - Must have only one function parameter - function object. -// -// Example of kernel caller function: -// template -// __attribute__((sycl_kernel)) void kernel_caller_function(KernelType -// KernelFuncObj) { -// KernelFuncObj(); -// } -// -// -void SemaSYCL::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, - MangleContext &MC) { - // The first argument to the KernelCallerFunc is the lambda object. - QualType KernelObjTy = GetSYCLKernelObjectType(KernelCallerFunc); - const CXXRecordDecl *KernelObj = KernelObjTy->getAsCXXRecordDecl(); - assert(KernelObj && "invalid kernel caller"); - - // Do not visit invalid kernel object. - if (KernelObj->isInvalidDecl()) - return; - - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - CXXMethodDecl *CallOperator = nullptr; - - if (KernelObj->isLambda()) - CallOperator = KernelObj->getLambdaCallOperator(); - else - CallOperator = KernelCallOperator.getCallOperator(); - - { - // Do enough to calculate the StableName for the purposes of the hackery - // below for __pf_kernel_wrapper. Placed in a scope so that we don't - // accidentially use these values below, before the names are stabililzed. - std::string CalculatedName, StableName; - std::tie(CalculatedName, StableName) = - constructKernelName(*this, KernelCallerFunc, MC); - - // Attributes of a user-written SYCL kernel must be copied to the internally - // generated alternative kernel, identified by a known string in its name. - if (StableName.find("__pf_kernel_wrapper") != std::string::npos) - copySYCLKernelAttrs(CallOperator); - } - - bool IsSIMDKernel = isESIMDKernelType(CallOperator); - - SyclKernelArgsSizeChecker argsSizeChecker(*this, KernelObj->getLocation(), - IsSIMDKernel); - ESIMDKernelDiagnostics esimdKernel(*this, KernelObj->getLocation(), - IsSIMDKernel); - - SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), - KernelCallerFunc->isInlined(), IsSIMDKernel, - KernelCallerFunc); - SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, - KernelCallerFunc, IsSIMDKernel, - CallOperator); - SyclKernelIntHeaderCreator int_header( - IsSIMDKernel, *this, getSyclIntegrationHeader(), KernelObj, - calculateKernelNameType(getASTContext(), KernelCallerFunc), - KernelCallerFunc); - - SyclKernelIntFooterCreator int_footer(*this, getSyclIntegrationFooter()); - SyclOptReportCreator opt_report(*this, kernel_decl, KernelObj->getLocation()); - - KernelObjVisitor Visitor{*this}; - - // Visit handlers to generate information for optimization record only if - // optimization record is saved. - if (!getLangOpts().OptRecordFile.empty()) { - Visitor.VisitKernelRecord(KernelObj, KernelObjTy, argsSizeChecker, - esimdKernel, kernel_decl, kernel_body, int_header, - int_footer, opt_report); - } else { - Visitor.VisitKernelRecord(KernelObj, KernelObjTy, argsSizeChecker, - esimdKernel, kernel_decl, kernel_body, int_header, - int_footer); - } - - if (ParmVarDecl *KernelHandlerArg = - getSyclKernelHandlerArg(KernelCallerFunc)) { - kernel_decl.handleSyclKernelHandlerType(); - kernel_body.handleSyclKernelHandlerType(KernelHandlerArg); - int_header.handleSyclKernelHandlerType(KernelHandlerArg->getType()); - - if (!getLangOpts().OptRecordFile.empty()) - opt_report.handleSyclKernelHandlerType(); - } -} - -void ConstructFreeFunctionKernel(SemaSYCL &SemaSYCLRef, FunctionDecl *FD) { - SyclKernelArgsSizeChecker argsSizeChecker(SemaSYCLRef, FD->getLocation(), - false /*IsSIMDKernel*/); - SyclKernelDeclCreator kernel_decl(SemaSYCLRef, FD->getLocation(), - FD->isInlined(), false /*IsSIMDKernel */, - FD); - - FreeFunctionKernelBodyCreator kernel_body(SemaSYCLRef, kernel_decl, FD); - - SyclKernelIntHeaderCreator int_header( - SemaSYCLRef, SemaSYCLRef.getSyclIntegrationHeader(), FD->getType(), FD); - - SyclKernelIntFooterCreator int_footer(SemaSYCLRef, - SemaSYCLRef.getSyclIntegrationFooter()); - KernelObjVisitor Visitor{SemaSYCLRef}; - - Visitor.VisitFunctionParameters(FD, argsSizeChecker, kernel_decl, kernel_body, - int_header, int_footer); -} - -// Figure out the sub-group for the this function. First we check the -// attributes, then the global settings. -static std::pair -CalcEffectiveSubGroup(ASTContext &Ctx, const LangOptions &LO, - const FunctionDecl *FD) { - if (const auto *A = FD->getAttr()) { - int64_t Val = getIntExprValue(A->getValue(), Ctx); - return {LangOptions::SubGroupSizeType::Integer, Val}; - } - - if (const auto *A = FD->getAttr()) { - if (A->getType() == IntelNamedSubGroupSizeAttr::Primary) - return {LangOptions::SubGroupSizeType::Primary, 0}; - return {LangOptions::SubGroupSizeType::Auto, 0}; - } - - // Return the global settings. - return {LO.getDefaultSubGroupSizeType(), - static_cast(LO.DefaultSubGroupSize)}; -} - -static SourceLocation GetSubGroupLoc(const FunctionDecl *FD) { - if (const auto *A = FD->getAttr()) - return A->getLocation(); - if (const auto *A = FD->getAttr()) - return A->getLocation(); - return SourceLocation{}; -} - -static void CheckSYCL2020SubGroupSizes(SemaSYCL &S, FunctionDecl *SYCLKernel, - const FunctionDecl *FD) { - // If they are the same, no error. - if (CalcEffectiveSubGroup(S.getASTContext(), S.getLangOpts(), SYCLKernel) == - CalcEffectiveSubGroup(S.getASTContext(), S.getLangOpts(), FD)) - return; - - // No need to validate __spirv routines here since they - // are mapped to the equivalent SPIRV operations. - const IdentifierInfo *II = FD->getIdentifier(); - if (II && II->getName().starts_with("__spirv_")) - return; - - // Else we need to figure out why they don't match. - SourceLocation FDAttrLoc = GetSubGroupLoc(FD); - SourceLocation KernelAttrLoc = GetSubGroupLoc(SYCLKernel); - - if (FDAttrLoc.isValid()) { - // This side was caused by an attribute. - S.Diag(FDAttrLoc, diag::err_sycl_mismatch_group_size) - << /*kernel called*/ 0; - - if (KernelAttrLoc.isValid()) { - S.Diag(KernelAttrLoc, diag::note_conflicting_attribute); - } else { - // Kernel is 'default'. - S.Diag(SYCLKernel->getLocation(), diag::note_sycl_kernel_declared_here); - } - return; - } - - // Else this doesn't have an attribute, which can only be caused by this being - // an undefined SYCL_EXTERNAL, and the kernel has an attribute that conflicts. - if (const auto *A = SYCLKernel->getAttr()) { - // Don't diagnose this if the kernel got its size from the 'old' attribute - // spelling. - if (!A->isSYCL2020Spelling()) - return; - } - - assert(KernelAttrLoc.isValid() && "Kernel doesn't have attribute either?"); - S.Diag(FD->getLocation(), diag::err_sycl_mismatch_group_size) - << /*undefined SYCL_EXTERNAL*/ 1; - S.Diag(KernelAttrLoc, diag::note_conflicting_attribute); -} - -// Check SYCL2020 Attributes. 2020 attributes don't propogate, they are only -// valid if they match the attribute on the kernel. Note that this is a slight -// difference from what the spec says, which says these attributes are only -// valid on SYCL Kernels and SYCL_EXTERNAL, but we felt that for -// self-documentation purposes that it would be nice to be able to repeat these -// on subsequent functions. -static void CheckSYCL2020Attributes( - SemaSYCL &S, FunctionDecl *SYCLKernel, FunctionDecl *KernelBody, - const llvm::SmallPtrSetImpl &CalledFuncs) { - - if (KernelBody) { - // Make sure the kernel itself has all the 2020 attributes, since we don't - // do propagation of these. - if (auto *A = KernelBody->getAttr()) - if (A->isSYCL2020Spelling()) - SYCLKernel->addAttr(A); - if (auto *A = KernelBody->getAttr()) - SYCLKernel->addAttr(A); - - // If the kernel has a body, we should get the attributes for the kernel - // from there instead, so that we get the functor object. - SYCLKernel = KernelBody; - } - - for (auto *FD : CalledFuncs) { - if (FD == SYCLKernel || FD == KernelBody) - continue; - for (auto *Attr : FD->attrs()) { - switch (Attr->getKind()) { - case attr::Kind::IntelReqdSubGroupSize: - // Pre SYCL2020 spellings handled during collection. - if (!cast(Attr)->isSYCL2020Spelling()) - break; - LLVM_FALLTHROUGH; - case attr::Kind::IntelNamedSubGroupSize: - CheckSYCL2020SubGroupSizes(S, SYCLKernel, FD); - break; - case attr::Kind::SYCLDevice: - // If a SYCL_EXTERNAL function is not defined in this TU, its necessary - // that it has a compatible sub-group-size. Don't diagnose if it has a - // sub-group attribute, we can count on the other checks to catch this. - if (!FD->isDefined() && !FD->hasAttr() && - !FD->hasAttr()) - CheckSYCL2020SubGroupSizes(S, SYCLKernel, FD); - break; - default: - break; - } - } - } -} - -static void PropagateAndDiagnoseDeviceAttr( - SemaSYCL &S, const SingleDeviceFunctionTracker &Tracker, Attr *A, - FunctionDecl *SYCLKernel, FunctionDecl *KernelBody) { - switch (A->getKind()) { - case attr::Kind::IntelReqdSubGroupSize: { - auto *Attr = cast(A); - - if (Attr->isSYCL2020Spelling()) - break; - const auto *KBSimdAttr = - KernelBody ? KernelBody->getAttr() : nullptr; - if (auto *Existing = SYCLKernel->getAttr()) { - if (getIntExprValue(Existing->getValue(), S.getASTContext()) != - getIntExprValue(Attr->getValue(), S.getASTContext())) { - S.Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); - S.Diag(Attr->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } - } else if (KBSimdAttr && - (getIntExprValue(Attr->getValue(), S.getASTContext()) != 1)) { - reportConflictingAttrs(S, KernelBody, KBSimdAttr, Attr); - } else { - SYCLKernel->addAttr(A); - } - break; - } - case attr::Kind::SYCLReqdWorkGroupSize: { - auto *RWGSA = cast(A); - if (auto *Existing = SYCLKernel->getAttr()) { - if (S.anyWorkGroupSizesDiffer(Existing->getXDim(), Existing->getYDim(), - Existing->getZDim(), RWGSA->getXDim(), - RWGSA->getYDim(), RWGSA->getZDim())) { - S.Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); - S.Diag(RWGSA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } - } else if (auto *Existing = - SYCLKernel->getAttr()) { - if (S.checkMaxAllowedWorkGroupSize( - RWGSA->getXDim(), RWGSA->getYDim(), RWGSA->getZDim(), - Existing->getXDim(), Existing->getYDim(), Existing->getZDim())) { - S.Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); - S.Diag(RWGSA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } else { - SYCLKernel->addAttr(A); - } - } else { - SYCLKernel->addAttr(A); - } - break; - } - case attr::Kind::SYCLWorkGroupSizeHint: { - auto *WGSH = cast(A); - if (auto *Existing = SYCLKernel->getAttr()) { - if (S.anyWorkGroupSizesDiffer(Existing->getXDim(), Existing->getYDim(), - Existing->getZDim(), WGSH->getXDim(), - WGSH->getYDim(), WGSH->getZDim())) { - S.Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); - S.Diag(WGSH->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } - } - SYCLKernel->addAttr(A); - break; - } - case attr::Kind::SYCLIntelMaxWorkGroupSize: { - auto *SIMWGSA = cast(A); - if (auto *Existing = SYCLKernel->getAttr()) { - if (S.checkMaxAllowedWorkGroupSize( - Existing->getXDim(), Existing->getYDim(), Existing->getZDim(), - SIMWGSA->getXDim(), SIMWGSA->getYDim(), SIMWGSA->getZDim())) { - S.Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - S.Diag(Existing->getLocation(), diag::note_conflicting_attribute); - S.Diag(SIMWGSA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } else { - SYCLKernel->addAttr(A); - } - } else { - SYCLKernel->addAttr(A); - } - break; - } - case attr::Kind::SYCLSimd: - if (KernelBody && !KernelBody->getAttr()) { - // Usual kernel can't call ESIMD functions. - S.Diag(KernelBody->getLocation(), - diag::err_sycl_function_attribute_mismatch) - << A; - S.Diag(A->getLocation(), diag::note_attribute); - KernelBody->setInvalidDecl(); - break; - } - LLVM_FALLTHROUGH; - case attr::Kind::SYCLIntelKernelArgsRestrict: - case attr::Kind::SYCLIntelNumSimdWorkItems: - case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: - case attr::Kind::SYCLIntelMaxGlobalWorkDim: - case attr::Kind::SYCLIntelMinWorkGroupsPerComputeUnit: - case attr::Kind::SYCLIntelMaxWorkGroupsPerMultiprocessor: - case attr::Kind::SYCLIntelNoGlobalWorkOffset: - case attr::Kind::SYCLIntelLoopFuse: - case attr::Kind::SYCLIntelMaxConcurrency: - case attr::Kind::SYCLIntelDisableLoopPipelining: - case attr::Kind::SYCLIntelInitiationInterval: - case attr::Kind::SYCLIntelUseStallEnableClusters: - case attr::Kind::SYCLDeviceHas: - case attr::Kind::SYCLAddIRAttributesFunction: - SYCLKernel->addAttr(A); - break; - case attr::Kind::IntelNamedSubGroupSize: - // Nothing to do here, handled in the SYCL2020 spelling. - break; - // TODO: vec_len_hint should be handled here - default: - // Seeing this means that CollectPossibleKernelAttributes was - // updated while this switch wasn't...or something went wrong - llvm_unreachable("Unexpected attribute was collected by " - "CollectPossibleKernelAttributes"); - } -} - -void SemaSYCL::MarkDevices() { - // This Tracker object ensures that the SyclDeviceDecls collection includes - // the SYCL_EXTERNAL functions, and manages the diagnostics for all of the - // functions in the kernel. - DeviceFunctionTracker Tracker(*this); - - for (Decl *D : syclDeviceDecls()) { - auto *SYCLKernel = cast(D); - - // This type does the actual analysis on a per-kernel basis. It does this to - // make sure that we're only ever dealing with the context of a single - // kernel at a time. - SingleDeviceFunctionTracker T{Tracker, SYCLKernel}; - - CheckSYCL2020Attributes(*this, T.GetSYCLKernel(), T.GetKernelBody(), - T.GetDeviceFunctions()); - for (auto *A : T.GetCollectedAttributes()) - PropagateAndDiagnoseDeviceAttr(*this, T, A, T.GetSYCLKernel(), - T.GetKernelBody()); - checkSYCLAddIRAttributesFunctionAttrConflicts(T.GetSYCLKernel()); - } -} - -void SemaSYCL::ProcessFreeFunction(FunctionDecl *FD) { - if (isFreeFunction(*this, FD)) { - SyclKernelDecompMarker DecompMarker(*this); - SyclKernelFieldChecker FieldChecker(*this); - SyclKernelUnionChecker UnionChecker(*this); - - KernelObjVisitor Visitor{*this}; - - DiagnosingSYCLKernel = true; - - // Check parameters of free function. - Visitor.VisitFunctionParameters(FD, DecompMarker, FieldChecker, - UnionChecker); - - DiagnosingSYCLKernel = false; - - // Ignore the free function if any of the checkers fail validation. - if (!FieldChecker.isValid() || !UnionChecker.isValid()) - return; - - ConstructFreeFunctionKernel(*this, FD); - } -} - -// ----------------------------------------------------------------------------- -// SYCL device specific diagnostics implementation -// ----------------------------------------------------------------------------- - -Sema::SemaDiagnosticBuilder -SemaSYCL::DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID, - DeviceDiagnosticReason Reason) { - assert(getLangOpts().SYCLIsDevice && - "Should only be called during SYCL compilation"); - FunctionDecl *FD = dyn_cast(SemaRef.getCurLexicalContext()); - SemaDiagnosticBuilder::Kind DiagKind = [this, FD, Reason] { - if (DiagnosingSYCLKernel) - return SemaDiagnosticBuilder::K_ImmediateWithCallStack; - if (!FD) - return SemaDiagnosticBuilder::K_Nop; - if (SemaRef.isConstantEvaluatedContext() || - SemaRef.currentEvaluationContext().isDiscardedStatementContext()) - return SemaDiagnosticBuilder::K_Nop; - // Defer until we know that the variable's intializer is actually a - // manifestly constant-evaluated expression. - if (SemaRef.InConstexprVarInit) - return SemaDiagnosticBuilder::K_Deferred; - if (SemaRef.getEmissionStatus(FD) == - Sema::FunctionEmissionStatus::Emitted) { - // Skip the diagnostic if we know it won't be emitted. - if ((SemaRef.getEmissionReason(FD) & Reason) == - Sema::DeviceDiagnosticReason::None) - return SemaDiagnosticBuilder::K_Nop; - - return SemaDiagnosticBuilder::K_ImmediateWithCallStack; - } - return SemaDiagnosticBuilder::K_Deferred; - }(); - return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, SemaRef, Reason); -} - -void SemaSYCL::deepTypeCheckForDevice(SourceLocation UsedAt, - llvm::DenseSet Visited, - ValueDecl *DeclToCheck) { - assert(getLangOpts().SYCLIsDevice && - "Should only be called during SYCL compilation"); - // Emit notes only for the first discovered declaration of unsupported type - // to avoid mess of notes. This flag is to track that error already happened. - bool NeedToEmitNotes = true; - - auto Check = [&](QualType TypeToCheck, const ValueDecl *D) { - bool ErrorFound = false; - if (isZeroSizedArray(*this, TypeToCheck)) { - DiagIfDeviceCode(UsedAt, diag::err_typecheck_zero_array_size) << 1; - ErrorFound = true; - } - // Checks for other types can also be done here. - if (ErrorFound) { - if (NeedToEmitNotes) { - if (auto *FD = dyn_cast(D)) - DiagIfDeviceCode(FD->getLocation(), - diag::note_illegal_field_declared_here) - << FD->getType()->isPointerType() << FD->getType(); - else - DiagIfDeviceCode(D->getLocation(), diag::note_declared_at); - } - } - - return ErrorFound; - }; - - // In case we have a Record used do the DFS for a bad field. - SmallVector StackForRecursion; - StackForRecursion.push_back(DeclToCheck); - - // While doing DFS save how we get there to emit a nice set of notes. - SmallVector History; - History.push_back(nullptr); - - do { - const ValueDecl *Next = StackForRecursion.pop_back_val(); - if (!Next) { - assert(!History.empty()); - // Found a marker, we have gone up a level. - History.pop_back(); - continue; - } - QualType NextTy = Next->getType(); - - if (!Visited.insert(NextTy).second) - continue; - - auto EmitHistory = [&]() { - // The first element is always nullptr. - for (uint64_t Index = 1; Index < History.size(); ++Index) { - DiagIfDeviceCode(History[Index]->getLocation(), - diag::note_within_field_of_type) - << History[Index]->getType(); - } - }; - - if (Check(NextTy, Next)) { - if (NeedToEmitNotes) - EmitHistory(); - NeedToEmitNotes = false; - } - - // In case pointer/array/reference type is met get pointee type, then - // proceed with that type. - while (NextTy->isAnyPointerType() || NextTy->isArrayType() || - NextTy->isReferenceType()) { - if (NextTy->isArrayType()) - NextTy = QualType{NextTy->getArrayElementTypeNoTypeQual(), 0}; - else - NextTy = NextTy->getPointeeType(); - if (Check(NextTy, Next)) { - if (NeedToEmitNotes) - EmitHistory(); - NeedToEmitNotes = false; - } - } - - if (const auto *RecDecl = NextTy->getAsRecordDecl()) { - if (auto *NextFD = dyn_cast(Next)) - History.push_back(NextFD); - // When nullptr is discovered, this means we've gone back up a level, so - // the history should be cleaned. - StackForRecursion.push_back(nullptr); - llvm::copy(RecDecl->fields(), std::back_inserter(StackForRecursion)); - } - } while (!StackForRecursion.empty()); -} - -void SemaSYCL::finalizeSYCLDelayedAnalysis(const FunctionDecl *Caller, - const FunctionDecl *Callee, - SourceLocation Loc, - DeviceDiagnosticReason Reason) { - Callee = Callee->getMostRecentDecl(); - - // If the reason for the emission of this diagnostic is not SYCL-specific, - // and it is not known to be reachable from a routine on device, do not - // issue a diagnostic. - if ((Reason & DeviceDiagnosticReason::Sycl) == DeviceDiagnosticReason::None && - !isFDReachableFromSyclDevice(Callee, Caller)) - return; - - // If Callee has a SYCL attribute, no diagnostic needed. - if (Callee->hasAttr() || Callee->hasAttr()) - return; - - // If Callee has a CUDA device attribute, no diagnostic needed. - if (getLangOpts().CUDA && Callee->hasAttr()) - return; - - // Diagnose if this is an undefined function and it is not a builtin. - // Currently, there is an exception of "__failed_assertion" in libstdc++-11, - // this undefined function is used to trigger a compiling error. - if (!Callee->isDefined() && !Callee->getBuiltinID() && - !Callee->isReplaceableGlobalAllocationFunction() && - !isSYCLUndefinedAllowed(Callee, SemaRef.getSourceManager())) { - Diag(Loc, diag::err_sycl_restrict) << SemaSYCL::KernelCallUndefinedFunction; - Diag(Callee->getLocation(), diag::note_previous_decl) << Callee; - Diag(Caller->getLocation(), diag::note_called_by) << Caller; - } -} - -bool SemaSYCL::checkAllowedSYCLInitializer(VarDecl *VD) { - assert(getLangOpts().SYCLIsDevice && - "Should only be called during SYCL compilation"); - - if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) - return true; - - const Expr *Init = VD->getInit(); - bool ValueDependent = Init && Init->isValueDependent(); - bool isConstantInit = Init && !ValueDependent && - Init->isConstantInitializer(getASTContext(), false); - if (!VD->isConstexpr() && Init && !ValueDependent && !isConstantInit) - return false; - - return true; -} - -// ----------------------------------------------------------------------------- -// Integration header functionality implementation -// ----------------------------------------------------------------------------- - -/// Returns a string ID of given parameter kind - used in header -/// emission. -static const char *paramKind2Str(KernelParamKind K) { -#define CASE(x) \ - case SYCLIntegrationHeader::kind_##x: \ - return "kind_" #x - switch (K) { - CASE(accessor); - CASE(std_layout); - CASE(sampler); - CASE(stream); - CASE(specialization_constants_buffer); - CASE(pointer); - CASE(work_group_memory); - } - return ""; - -#undef CASE -} - -// Emits forward declarations of classes and template classes on which -// declaration of given type depends. -// For example, consider SimpleVadd -// class specialization in parallel_for below: -// -// template -// class SimpleVadd; -// ... -// template -// void simple_vadd(const std::array& VA, const std::array& -// VB, -// std::array& VC, int param, T2 ... varargs) { -// ... -// deviceQueue.submit([&](sycl::handler& cgh) { -// ... -// cgh.parallel_for>(...) -// ... -// } -// ... -// } -// ... -// class MyClass {...}; -// template class MyInnerTmplClass { ... } -// template class MyTmplClass { ... } -// ... -// MyClass *c = new MyClass(); -// MyInnerTmplClass c1(&c); -// simple_vadd(A, B, C, 5, 'a', 1.f, -// new MyTmplClass>(c1)); -// -// it will generate the following forward declarations: -// class MyClass; -// template class MyInnerTmplClass; -// template class MyTmplClass; -// template class SimpleVadd; -// -class SYCLFwdDeclEmitter - : public TypeVisitor, - public ConstTemplateArgumentVisitor { - using InnerTypeVisitor = TypeVisitor; - using InnerTemplArgVisitor = ConstTemplateArgumentVisitor; - raw_ostream &OS; - llvm::SmallPtrSet Printed; - PrintingPolicy Policy; - - void printForwardDecl(NamedDecl *D) { - // wrap the declaration into namespaces if needed - unsigned NamespaceCnt = 0; - std::string NSStr = ""; - const DeclContext *DC = D->getDeclContext(); - - while (DC) { - if (const auto *NS = dyn_cast(DC)) { - ++NamespaceCnt; - StringRef NSInlinePrefix = NS->isInline() ? "inline " : ""; - NSStr.insert( - 0, - Twine(NSInlinePrefix + "namespace " + NS->getName() + " { ").str()); - DC = NS->getDeclContext(); - } else { - // We should be able to handle a subset of the decl-context types to - // make our namespaces for forward declarations as specific as possible, - // so just skip them here. We can't use their names, since they would - // not be forward declarable, but we can try to make them as specific as - // possible. - // This permits things such as: - // namespace N1 { void foo() { kernel(...); }} - // and - // namespace N2 { void foo() { kernel(...); }} - // to co-exist, despite technically being against the SYCL rules. - // See SYCLKernelNameTypePrinter for the corresponding part that prints - // the kernel information for this type. These two must match. - if (isa(DC)) { - DC = cast(DC)->getDeclContext(); - } else { - break; - } - } - } - OS << NSStr; - if (NamespaceCnt > 0) - OS << "\n"; - - D->print(OS, Policy); - - if (const auto *ED = dyn_cast(D)) { - QualType T = ED->getIntegerType().getCanonicalType(); - // Backup since getIntegerType() returns null for enum forward - // declaration with no fixed underlying type - if (T.isNull()) - T = ED->getPromotionType(); - OS << " : " << T.getAsString(); - } - - OS << ";\n"; - - // print closing braces for namespaces if needed - for (unsigned I = 0; I < NamespaceCnt; ++I) - OS << "}"; - if (NamespaceCnt > 0) - OS << "\n"; - } - - // Checks if we've already printed forward declaration and prints it if not. - void checkAndEmitForwardDecl(NamedDecl *D) { - if (Printed.insert(D).second) - printForwardDecl(D); - } - - void VisitTemplateArgs(ArrayRef Args) { - for (size_t I = 0, E = Args.size(); I < E; ++I) - Visit(Args[I]); - } - -public: - SYCLFwdDeclEmitter(raw_ostream &OS, const LangOptions &LO) - : OS(OS), Policy(LO) { - Policy.adjustForCPlusPlusFwdDecl(); - Policy.SuppressTypedefs = true; - Policy.SuppressUnwrittenScope = true; - Policy.PrintCanonicalTypes = true; - Policy.SkipCanonicalizationOfTemplateTypeParms = true; - Policy.SuppressFinalSpecifier = true; - } - - void Visit(QualType T) { - if (T.isNull()) - return; - InnerTypeVisitor::Visit(T.getTypePtr()); - } - - void VisitReferenceType(const ReferenceType *RT) { - // Our forward declarations don't care about references, so we should just - // ignore the reference and continue on. - Visit(RT->getPointeeType()); - } - - void Visit(const TemplateArgument &TA) { - if (TA.isNull()) - return; - InnerTemplArgVisitor::Visit(TA); - } - - void VisitPointerType(const PointerType *T) { - // Peel off the pointer types. - QualType PT = T->getPointeeType(); - while (PT->isPointerType()) - PT = PT->getPointeeType(); - Visit(PT); - } - - void VisitTagType(const TagType *T) { - TagDecl *TD = T->getDecl(); - if (const auto *TSD = dyn_cast(TD)) { - // - first, recurse into template parameters and emit needed forward - // declarations - ArrayRef Args = TSD->getTemplateArgs().asArray(); - VisitTemplateArgs(Args); - // - second, emit forward declaration for the template class being - // specialized - ClassTemplateDecl *CTD = TSD->getSpecializedTemplate(); - assert(CTD && "template declaration must be available"); - - checkAndEmitForwardDecl(CTD); - return; - } - checkAndEmitForwardDecl(TD); - } - - void VisitTypeTemplateArgument(const TemplateArgument &TA) { - QualType T = TA.getAsType(); - Visit(T); - } - - void VisitIntegralTemplateArgument(const TemplateArgument &TA) { - QualType T = TA.getIntegralType(); - if (const EnumType *ET = T->getAs()) - VisitTagType(ET); - } - - void VisitTemplateTemplateArgument(const TemplateArgument &TA) { - // recursion is not required, since the maximum possible nesting level - // equals two for template argument - // - // for example: - // template class Bar; - // template