From 0117f88c5d1167efdc8c90aad036dece492f8e72 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Wed, 5 Aug 2020 13:33:28 +0300 Subject: [PATCH] [SYCL] Align stream handling with other classes This change simplifies FE handling of stream class and makes it identical to handling of accessor and sampler. Stream class is not handled as wrapper struct for several accessors and initialized within __init method. --- clang/include/clang/Sema/Sema.h | 1 + clang/lib/Sema/SemaSYCL.cpp | 78 ++++----------- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 15 ++- clang/test/CodeGenSYCL/stream.cpp | 12 ++- sycl/include/CL/sycl/accessor.hpp | 2 + sycl/include/CL/sycl/detail/kernel_desc.hpp | 1 + sycl/include/CL/sycl/stream.hpp | 44 ++++++++- sycl/source/detail/scheduler/commands.cpp | 2 + sycl/source/handler.cpp | 104 ++++++++++++++------ 9 files changed, 167 insertions(+), 92 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 40ad35cc84576..acac611ae43bf 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -314,6 +314,7 @@ class SYCLIntegrationHeader { kind_accessor = kind_first, kind_std_layout, kind_sampler, + kind_stream, kind_pointer, kind_last = kind_pointer }; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 631f81fe4a9ab..67ae17e7b5a5a 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -880,26 +880,6 @@ class KernelObjVisitor { VisitRecordFields(Owner, handlers...); } - // FIXME: Can this be refactored/handled some other way? - template - void VisitStreamRecord(const CXXRecordDecl *Owner, ParentTy &Parent, - CXXRecordDecl *Wrapper, Handlers &... handlers) { - (void)std::initializer_list{ - (handlers.enterStruct(Owner, Parent), 0)...}; - for (const auto &Field : Wrapper->fields()) { - QualType FieldTy = Field->getType(); - (void)std::initializer_list{ - (handlers.enterField(Wrapper, Field), 0)...}; - // Required to initialize accessors inside streams. - if (Util::isSyclAccessorType(FieldTy)) - KF_FOR_EACH(handleSyclAccessorType, Field, FieldTy); - (void)std::initializer_list{ - (handlers.leaveField(Wrapper, Field), 0)...}; - } - (void)std::initializer_list{ - (handlers.leaveStruct(Owner, Parent), 0)...}; - } - template void VisitRecordBases(const CXXRecordDecl *KernelFunctor, Handlers &... handlers) { @@ -924,12 +904,9 @@ class KernelObjVisitor { KF_FOR_EACH(handleSyclHalfType, Field, FieldTy); else if (Util::isSyclSpecConstantType(FieldTy)) KF_FOR_EACH(handleSyclSpecConstantType, Field, FieldTy); - else if (Util::isSyclStreamType(FieldTy)) { - CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); - // Handle accessors in stream class. - VisitStreamRecord(Owner, Field, RD, handlers...); + else if (Util::isSyclStreamType(FieldTy)) KF_FOR_EACH(handleSyclStreamType, Field, FieldTy); - } else if (FieldTy->isStructureOrClassType()) { + else if (FieldTy->isStructureOrClassType()) { if (KF_FOR_EACH(handleStructType, Field, FieldTy)) { CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); VisitRecord(Owner, Field, RD, handlers...); @@ -1297,8 +1274,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { } bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy); - return true; + return handleSpecialType(FD, FieldTy); } bool handleSyclStreamType(const CXXBaseSpecifier &, QualType FieldTy) final { @@ -1515,6 +1491,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { createSpecialMethodCall(MemberExprBases.back(), InitMethod, FD); BodyStmts.push_back(InitCall); } + CXXMethodDecl *FinalizeMethod = + getMethodByName(RecordDecl, FinalizeMethodName); + if (FinalizeMethod) { + CXXMemberCallExpr *FinalizeCall = + createSpecialMethodCall(MemberExprBases.back(), FinalizeMethod, FD); + FinalizeStmts.push_back(FinalizeCall); + } return true; } @@ -1537,6 +1520,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { createSpecialMethodCall(MemberExprBases.back(), InitMethod, nullptr); BodyStmts.push_back(InitCall); } + CXXMethodDecl *FinalizeMethod = + getMethodByName(RecordDecl, FinalizeMethodName); + if (FinalizeMethod) { + CXXMemberCallExpr *FinalizeCall = createSpecialMethodCall( + MemberExprBases.back(), FinalizeMethod, nullptr); + FinalizeStmts.push_back(FinalizeCall); + } return true; } @@ -1583,23 +1573,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } bool handleSyclStreamType(FieldDecl *FD, QualType Ty) final { - const auto *StreamDecl = Ty->getAsCXXRecordDecl(); - createExprForStructOrScalar(FD); - size_t NumBases = MemberExprBases.size(); - CXXMethodDecl *InitMethod = getMethodByName(StreamDecl, InitMethodName); - if (InitMethod) { - CXXMemberCallExpr *InitCall = - createSpecialMethodCall(MemberExprBases.back(), InitMethod, FD); - BodyStmts.push_back(InitCall); - } - CXXMethodDecl *FinalizeMethod = - getMethodByName(StreamDecl, FinalizeMethodName); - if (FinalizeMethod) { - CXXMemberCallExpr *FinalizeCall = createSpecialMethodCall( - MemberExprBases[NumBases - 2], FinalizeMethod, FD); - FinalizeStmts.push_back(FinalizeCall); - } - return true; + return handleSpecialType(FD, Ty); } bool handleSyclStreamType(const CXXBaseSpecifier &BS, QualType Ty) final { @@ -1666,18 +1640,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const CXXRecordDecl *RD = FD->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); - // Initializers for accessors inside stream not added. - if (!Util::isSyclStreamType(FD->getType())) - addStructInit(RD); - // Pop out unused initializers created in handleSyclAccesorType - // for accessors inside stream class. - else { - for (const auto &Field : RD->fields()) { - QualType FieldTy = Field->getType(); - if (Util::isSyclAccessorType(FieldTy)) - InitExprs.pop_back(); - } - } + addStructInit(RD); return true; } @@ -1831,7 +1794,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { } bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final { - addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); + addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); return true; } @@ -2211,6 +2174,7 @@ static const char *paramKind2Str(KernelParamKind K) { CASE(accessor); CASE(std_layout); CASE(sampler); + CASE(stream); CASE(pointer); default: return ""; diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 3184c58edcbfc..06145b8914fc2 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -141,6 +141,7 @@ class accessor { private: void __init(__attribute__((opencl_global)) dataT *Ptr, range AccessRange, range MemRange, id Offset) {} + friend class stream; }; template @@ -314,10 +315,22 @@ class stream { public: stream(unsigned long BufferSize, unsigned long MaxStatementSize, handler &CGH) {} +#ifdef __SYCL_DEVICE_ONLY__ + // Default constructor for objects later initialized with __init member. + stream() = default; +#endif - void __init() {} + void __init(__attribute((opencl_global)) char *Ptr, range<1> AccessRange, + range<1> MemRange, id<1> Offset, int _FlushBufferSize) { + Acc.__init(Ptr, AccessRange, MemRange, Offset); + FlushBufferSize = _FlushBufferSize; + } void __finalize() {} + +private: + cl::sycl::accessor Acc; + int FlushBufferSize; }; template diff --git a/clang/test/CodeGenSYCL/stream.cpp b/clang/test/CodeGenSYCL/stream.cpp index 8620f83c5fabf..3af57a2d6f5d5 100644 --- a/clang/test/CodeGenSYCL/stream.cpp +++ b/clang/test/CodeGenSYCL/stream.cpp @@ -1,8 +1,16 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -I %S/Inputs -disable-llvm-passes -emit-llvm %s -o %t.ll // RUN: FileCheck < %t.ll --enable-var-scope %s // -// CHECK: define spir_kernel void @"{{.*}}StreamTester"(%"{{.*}}cl::sycl::stream"* byval(%"{{.*}}cl::sycl::stream") {{.*}}){{.*}} -// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* %{{[0-9]+}}) +// CHECK: %[[RANGE_TYPE:"struct.*cl::sycl::range"]] +// CHECK: %[[ID_TYPE:"struct.*cl::sycl::id"]] +// CHECK: define spir_kernel void @{{.*}}StreamTester +// CHECK-SAME: i8 addrspace(1)* [[ACC_DATA:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %[[ID_TYPE]]* byval(%[[ID_TYPE]]) align 4 [[ACC_ID:%[a-zA-Z0-9_]+]], +// CHECK-SAME: i32 [[ARG_INT:%[a-zA-Z0-9_]+]]) + +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* %{{[0-9]+}}, i8 addrspace(1)* %5, %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) {{.*}} %{{.*}} // CHECK: call spir_func void @{{.*}}__finalize{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* %{{[0-9]+}}) // diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 8631cee6ab640..0f4f4a1b2b7e7 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -199,6 +199,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +class stream; namespace intel { namespace gpu { // Forward declare a "back-door" access class to support ESIMD. @@ -886,6 +887,7 @@ class accessor : private: friend class sycl::intel::gpu::AccessorPrivateProxy; + friend class sycl::stream; public: using value_type = DataT; diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index 120fb9dc4e96c..62996a406a609 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -27,6 +27,7 @@ enum class kernel_param_kind_t { kind_accessor, kind_std_layout, // standard layout object parameters kind_sampler, + kind_stream, kind_pointer }; diff --git a/sycl/include/CL/sycl/stream.hpp b/sycl/include/CL/sycl/stream.hpp index 20cdef064b2ef..bca427a001e93 100644 --- a/sycl/include/CL/sycl/stream.hpp +++ b/sycl/include/CL/sycl/stream.hpp @@ -63,11 +63,24 @@ using GlobalBufAccessorT = accessor; +constexpr static access::address_space GlobalBufAS = + TargetToAS::AS; +using GlobalBufPtrType = + typename detail::PtrValueType::type *; +constexpr static int GlobalBufDim = 1; + using GlobalOffsetAccessorT = accessor; +constexpr static access::address_space GlobalOffsetAS = + TargetToAS::AS; +using GlobalOffsetPtrType = + typename detail::PtrValueType::type *; +constexpr static int GlobalOffsetDim = 1; + + inline void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned &Offset, const char *Str, unsigned Len, unsigned Padding = 0) { @@ -697,6 +710,12 @@ inline __width_manipulator__ setw(int Width) { /// \ingroup sycl_api class __SYCL_EXPORT stream { public: + +#ifdef __SYCL_DEVICE_ONLY__ + // Default constructor for objects later initialized with __init member. + stream() = default; +#endif + stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH); size_t get_size() const; @@ -810,7 +829,28 @@ class __SYCL_EXPORT stream { } #ifdef __SYCL_DEVICE_ONLY__ - void __init() { + void __init(detail::GlobalBufPtrType GlobalBufPtr, + range GlobalBufAccRange, + range GlobalBufMemRange, + id GlobalBufId, + detail::GlobalOffsetPtrType GlobalOffsetPtr, + range GlobalOffsetAccRange, + range GlobalOffsetMemRange, + id GlobalOffsetId, + detail::GlobalBufPtrType GlobalFlushPtr, + range GlobalFlushAccRange, + range GlobalFlushMemRange, + id GlobalFlushId, + size_t _FlushBufferSize) { +#ifndef __SYCL_EXPLICIT_SIMD__ + GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange, + GlobalBufId); + GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange, + GlobalOffsetMemRange, GlobalOffsetId); + GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange, + GlobalFlushMemRange, GlobalFlushId); +#endif + FlushBufferSize = _FlushBufferSize; // Calculate work item's global id, this should be done once, that // is why this is done in _init method, call to __init method is generated // by frontend. As a result each work item will write to its own section @@ -834,6 +874,8 @@ class __SYCL_EXPORT stream { } #endif + friend class handler; + friend const stream &operator<<(const stream &, const char); friend const stream &operator<<(const stream &, const char *); template diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f21d39d8efc3d..e0798c89de34a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1652,6 +1652,8 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( const detail::plugin &Plugin = MQueue->getPlugin(); for (ArgDesc &Arg : ExecKernel->MArgs) { switch (Arg.MType) { + case kernel_param_kind_t::kind_stream: + break; case kernel_param_kind_t::kind_accessor: { Requirement *Req = (Requirement *)(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5a54760e813e7..74fc220f050be 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,41 @@ void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, /*index*/ 0); } +static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, + const size_t Index, size_t &IndexShift, + const int Size, + bool IsKernelCreatedFromSource, + size_t GlobalSize, + vector_class &Args) { + using detail::kernel_param_kind_t; + if (AccImpl->PerWI) + AccImpl->resize(GlobalSize); + + Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size, + Index + IndexShift); + + // TODO ESIMD currently does not suport offset, memory and access ranges - + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) { + // Dimensionality of the buffer is 1 when dimensionality of the + // accessor is 0. + const size_t SizeAccField = + sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MAccessRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MMemoryRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MOffset[0], SizeAccField, Index + IndexShift); + } +} + void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource) { @@ -141,6 +177,40 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift); break; } + case kernel_param_kind_t::kind_stream: { + // Stream contains several accessors inside. + stream *S = static_cast(Ptr); + + detail::AccessorBaseHost *GBufBase = + (detail::AccessorBaseHost *)&S->GlobalBuf; + detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase); + detail::Requirement *GBufReq = GBufImpl.get(); + addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); + ++IndexShift; + detail::AccessorBaseHost *GOffsetBase = + (detail::AccessorBaseHost *)&S->GlobalOffset; + detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase); + detail::Requirement *GOffsetReq = GOfssetImpl.get(); + addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); + ++IndexShift; + detail::AccessorBaseHost *GFlushBase = + (detail::AccessorBaseHost *)&S->GlobalFlushBuf; + detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase); + detail::Requirement *GFlushReq = GFlushImpl.get(); + addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); + ++IndexShift; + MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, + &S->FlushBufferSize, sizeof(S->FlushBufferSize), + Index + IndexShift); + + break; + } case kernel_param_kind_t::kind_accessor: { // For args kind of accessor Size is information about accessor. // The first 11 bits of Size encodes the accessor target. @@ -149,37 +219,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::global_buffer: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - - // Stream implementation creates an accessor with initial size for - // work item. Number of work items is not available during - // stream construction, that is why size of the accessor is updated here - // using information about number of work items. - if (AccImpl->PerWI) { - AccImpl->resize(MNDRDesc.GlobalSize.size()); - } - MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift); - - // TODO ESIMD currently does not suport offset, memory and access ranges - - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) { - // Dimensionality of the buffer is 1 when dimensionality of the - // accessor is 0. - const size_t SizeAccField = - sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); - ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MAccessRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MMemoryRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MOffset[0], SizeAccField, - Index + IndexShift); - } + addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, + MNDRDesc.GlobalSize.size(), MArgs); break; } case access::target::local: {