diff --git a/sycl/include/CL/__spirv/spirv_vars.hpp b/sycl/include/CL/__spirv/spirv_vars.hpp index 7271138fa9dd3..e25f0de9fee65 100644 --- a/sycl/include/CL/__spirv/spirv_vars.hpp +++ b/sycl/include/CL/__spirv/spirv_vars.hpp @@ -15,7 +15,7 @@ #define __SPIRV_VAR_QUALIFIERS extern "C" const -#ifdef __SYCL_NVPTX__ +#if defined(__SYCL_NVPTX__) || defined(__SYCL_EXPLICIT_SIMD__) SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_x(); SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_y(); diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 125944e6c10eb..7b1560e51dbaa 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -195,6 +195,17 @@ /// accessor_common contains several helpers common for both accessor(1) and /// accessor(3) +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace intel { +namespace gpu { +// Forward declare a "back-door" access class to support ESIMD. +class AccessorPrivateProxy; +} // namespace gpu +} // namespace intel +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -419,6 +430,13 @@ class image_accessor #endif +private: + friend class sycl::intel::gpu::AccessorPrivateProxy; + +#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_EXPLICIT_SIMD__) + const OCLImageTy getNativeImageObj() const { return MImageObj; } +#endif // __SYCL_DEVICE_ONLY__ && __SYCL_EXPLICIT_SIMD__ + public: using value_type = DataT; using reference = DataT &; @@ -805,8 +823,27 @@ class accessor : detail::AccessorImplDevice impl; - ConcreteASPtrType MData; +#ifdef __SYCL_EXPLICIT_SIMD__ + using OCLImage1dBufferTy = + typename detail::opencl_image1d_buffer_type::type; +#endif // __SYCL_EXPLICIT_SIMD__ + + union { + ConcreteASPtrType MData; +#ifdef __SYCL_EXPLICIT_SIMD__ + OCLImage1dBufferTy ImageBuffer; +#endif // __SYCL_EXPLICIT_SIMD__ + }; + +#ifdef __SYCL_EXPLICIT_SIMD__ + // TODO In ESIMD accessors usage is limited for now - access range, mem + // range and offset are not supported. The cl_mem object allocated for + // a global accessor is always wrapped into a 1d image buffer to enable + // surface index-based addressing. + void __init(OCLImage1dBufferTy ImgBuf) { ImageBuffer = ImgBuf; } + const OCLImage1dBufferTy getNativeImageObj() const { return ImageBuffer; } +#else void __init(ConcreteASPtrType Ptr, range AccessRange, range MemRange, id Offset) { MData = Ptr; @@ -820,7 +857,7 @@ class accessor : if (1 == AdjustedDim) MData += Offset[0]; } - +#endif // __SYCL_EXPLICIT_SIMD__ ConcreteASPtrType getQualifiedPtr() const { return MData; } public: @@ -843,6 +880,9 @@ class accessor : #endif // __SYCL_DEVICE_ONLY__ +private: + friend class sycl::intel::gpu::AccessorPrivateProxy; + public: using value_type = DataT; using reference = DataT &; diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 5ef72ce5f57b9..76676014975c2 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -15,6 +15,17 @@ #include #include +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace intel { +namespace gpu { +// Forward declare a "back-door" access class to support ESIMD. +class AccessorPrivateProxy; +} // namespace gpu +} // namespace intel +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -59,16 +70,29 @@ template class LocalAccessorBaseDevice { } }; +// TODO ESIMD Currently all accessors are treated as ESIMD under corresponding +// compiler option enabling the macro below. Eventually ESIMD kernels and usual +// kernels must co-exist and there must be a mechanism for distinguishing usual +// and ESIMD accessors. +#ifndef __SYCL_EXPLICIT_SIMD__ +constexpr bool IsESIMDAccInit = false; +#else +constexpr bool IsESIMDAccInit = true; +#endif // __SYCL_EXPLICIT_SIMD__ + class __SYCL_EXPORT AccessorImplHost { public: AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, detail::SYCLMemObjI *SYCLMemObject, int Dims, int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false) + bool IsSubBuffer = false, bool IsESIMDAcc = IsESIMDAccInit) : MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange), MAccessMode(AccessMode), MSYCLMemObj(SYCLMemObject), MDims(Dims), MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer) {} + MIsSubBuffer(IsSubBuffer) { + MIsESIMDAcc = + IsESIMDAcc && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER); + } ~AccessorImplHost(); @@ -77,7 +101,7 @@ class __SYCL_EXPORT AccessorImplHost { MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode), MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims), MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes), - MIsSubBuffer(Other.MIsSubBuffer) {} + MIsSubBuffer(Other.MIsSubBuffer), MIsESIMDAcc(Other.MIsESIMDAcc) {} // The resize method provides a way to change the size of the // allocated memory and corresponding properties for the accessor. @@ -109,6 +133,9 @@ class __SYCL_EXPORT AccessorImplHost { Command *MBlockedCmd = nullptr; bool PerWI = false; + + // Whether this accessor is ESIMD accessor with special memory allocation. + bool MIsESIMDAcc; }; using AccessorImplPtr = shared_ptr_class; @@ -121,7 +148,8 @@ class AccessorBaseHost { bool IsSubBuffer = false) { impl = shared_ptr_class(new AccessorImplHost( Offset, AccessRange, MemoryRange, AccessMode, SYCLMemObject, Dims, - ElemSize, OffsetInBytes, IsSubBuffer)); + ElemSize, OffsetInBytes, IsSubBuffer, + IsESIMDAccInit && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER))); } protected: @@ -140,6 +168,9 @@ class AccessorBaseHost { friend decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject); AccessorImplPtr impl; + +private: + friend class sycl::intel::gpu::AccessorPrivateProxy; }; class __SYCL_EXPORT LocalAccessorImplHost { diff --git a/sycl/include/CL/sycl/detail/image_ocl_types.hpp b/sycl/include/CL/sycl/detail/image_ocl_types.hpp index 2dd20bf60e412..8d5822495dda5 100644 --- a/sycl/include/CL/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/CL/sycl/detail/image_ocl_types.hpp @@ -180,6 +180,30 @@ inline int getSPIRVElementSize(int ImageChannelType, int ImageChannelOrder) { } } +#ifdef __SYCL_EXPLICIT_SIMD__ +template struct opencl_image1d_buffer_type; + +// OpenCL types used only when compiling DPCPP ESIMD kernels +#define IMAGE_BUFFER_TY_DEFINE(AccessMode, AMSuffix) \ + template <> struct opencl_image1d_buffer_type { \ + using type = __ocl_image1d_buffer_##AMSuffix##_t; \ + } + +IMAGE_BUFFER_TY_DEFINE(read, ro); +IMAGE_BUFFER_TY_DEFINE(write, wo); +IMAGE_BUFFER_TY_DEFINE(discard_write, wo); +IMAGE_BUFFER_TY_DEFINE(read_write, rw); + +template <> struct opencl_image1d_buffer_type { + // static_assert(false && "atomic access not supported for image1d + // buffers"); + // TODO this should be disabled; currently there is instantiation of this + // class happenning even if atomic access not used - using dummy type + // definition for now. + using type = unsigned int; +}; +#endif // __SYCL_EXPLICIT_SIMD__ + template struct opencl_image_type; diff --git a/sycl/include/CL/sycl/detail/memory_manager.hpp b/sycl/include/CL/sycl/detail/memory_manager.hpp index 366edf4093e8c..711d9a0ac0347 100644 --- a/sycl/include/CL/sycl/detail/memory_manager.hpp +++ b/sycl/include/CL/sycl/detail/memory_manager.hpp @@ -47,6 +47,14 @@ class __SYCL_EXPORT MemoryManager { std::vector DepEvents, RT::PiEvent &OutEvent); + // Allocates memory buffer wrapped into an image. MemObj must be a buffer, + // not an image. Used in ESIMD extension to enable surface index-based access. + static void *wrapIntoImageBuffer(ContextImplPtr TargetContext, void *MemBuf, + SYCLMemObjI *MemObj); + + // Releases the image buffer created by wrapIntoImageBuffer. + static void releaseImageBuffer(ContextImplPtr TargetContext, void *ImageBuf); + // The following method creates OpenCL sub buffer for specified // offset, range, and memory object. static void *allocateMemSubBuffer(ContextImplPtr TargetContext, diff --git a/sycl/include/CL/sycl/detail/stl_type_traits.hpp b/sycl/include/CL/sycl/detail/stl_type_traits.hpp index 163248a989e68..83da9d8cf60c0 100644 --- a/sycl/include/CL/sycl/detail/stl_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/stl_type_traits.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -35,6 +36,11 @@ using remove_reference_t = typename std::remove_reference::type; template using add_pointer_t = typename std::add_pointer::type; +template using remove_cv_t = typename std::remove_cv::type; + +template +using remove_reference_t = typename std::remove_reference::type; + // C++17 template using bool_constant = std::integral_constant; diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 07eb74e4ef36f..4b3b2a144579a 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -40,4 +40,3 @@ void addHostAccessorAndWait(Requirement *Req) { } } } - diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 31948be77f8f7..30aaec2e2e1cd 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -45,6 +45,12 @@ void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, MemObj->releaseMem(TargetContext, MemAllocation); } +void MemoryManager::releaseImageBuffer(ContextImplPtr TargetContext, + void *ImageBuf) { + auto PIObj = reinterpret_cast(ImageBuf); + TargetContext->getPlugin().call(PIObj); +} + void MemoryManager::releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, void *UserPtr) { @@ -75,6 +81,30 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, OutEvent); } +// Creates an image1d buffer wrapper object around given memory object. +void *MemoryManager::wrapIntoImageBuffer(ContextImplPtr TargetContext, + void *MemBuf, SYCLMemObjI *MemObj) { + // Image format: 1 channel per pixel, each pixel 8 bit, Size pixels occupies + // Size bytes. + pi_image_format Format = {PI_IMAGE_CHANNEL_ORDER_R, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8}; + + // Image descriptor - request wrapper image1d creation. + pi_image_desc Desc = {}; + Desc.image_type = PI_MEM_TYPE_IMAGE1D_BUFFER; + Desc.image_width = MemObj->getSize(); + Desc.buffer = reinterpret_cast(MemBuf); + + // Create the image object. + const detail::plugin &Plugin = TargetContext->getPlugin(); + pi_mem Res = nullptr; + pi_mem_flags Flags = 0; + // Do not ref count the context handle, as it is not captured by the call. + Plugin.call(TargetContext->getHandleRef(), Flags, + &Format, &Desc, nullptr, &Res); + return Res; +} + void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size) { // Can return user pointer directly if it points to writable memory. diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 2867b70e57d8a..7f4a1e6d6e6a6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -774,6 +774,14 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, LinkDeviceLibs = false; } + // TODO: this is a temporary workaround for GPU tests for ESIMD compiler. + // We do not link with other device libraries, because it may fail + // due to unrecognized SPIRV format of those libraries. + if (std::string(LinkOpts).find(std::string("-cmc")) != std::string::npos || + std::string(LinkOpts).find(std::string("-vc-codegen")) != + std::string::npos) + LinkDeviceLibs = false; + std::vector LinkPrograms; if (LinkDeviceLibs) { LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e836336b98449..823f17704f607 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -751,6 +751,14 @@ cl_int AllocaCommand::enqueueImp() { detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(), MInitFromUserData, HostPtr, std::move(EventImpls), Event); + // if this is ESIMD accessor, wrap the allocated device memory buffer into + // an image buffer object. + // TODO Address copying SYCL/ESIMD memory between contexts. + if (getRequirement()->MIsESIMDAcc) + ESIMDExt.MWrapperImage = MemoryManager::wrapIntoImageBuffer( + detail::getSyclObjImpl(MQueue->get_context()), MMemAllocation, + getSYCLMemObj()); + return CL_SUCCESS; } @@ -937,12 +945,16 @@ cl_int ReleaseCommand::enqueueImp() { RT::PiEvent &Event = MEvent->getHandleRef(); if (SkipRelease) Command::waitForEvents(MQueue, EventImpls, Event); - else + else { MemoryManager::release(detail::getSyclObjImpl(MQueue->get_context()), MAllocaCmd->getSYCLMemObj(), MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); - + // Release the wrapper object if present. + if (void *WrapperImage = MAllocaCmd->ESIMDExt.MWrapperImage) + MemoryManager::releaseImageBuffer( + detail::getSyclObjImpl(MQueue->get_context()), WrapperImage); + } return CL_SUCCESS; } @@ -1638,7 +1650,9 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( case kernel_param_kind_t::kind_accessor: { Requirement *Req = (Requirement *)(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation(); + RT::PiMem MemArg = Req->MIsESIMDAcc + ? (RT::PiMem)AllocaCmd->ESIMDExt.MWrapperImage + : (RT::PiMem)AllocaCmd->getMemAllocation(); if (Plugin.getBackend() == backend::opencl) { Plugin.call(Kernel, Arg.MIndex, sizeof(RT::PiMem), &MemArg); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 76542bf4d1fa6..816616f6a5e50 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -320,6 +320,13 @@ class AllocaCommandBase : public Command { void *MMemAllocation = nullptr; + // ESIMD-extension-specific fields. + struct { + // If this alloca corresponds to an ESIMD accessor, then this field holds + // an image buffer wrapping the memory allocation above. + void *MWrapperImage = nullptr; + } ESIMDExt; + /// Alloca command linked with current command. /// Device and host alloca commands can be linked, so they may share the same /// memory. Only one allocation from a pair can be accessed at a time. Alloca diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 4931c4a41731a..730c7c8d4bcc8 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -559,7 +559,9 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange, Req->MMemoryRange, access::mode::read_write, - Req->MSYCLMemObj, Req->MDims, Req->MElemSize); + Req->MSYCLMemObj, Req->MDims, Req->MElemSize, + 0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/, + Req->MIsESIMDAcc); // Can reuse user data for the first allocation const bool InitFromUserData = Record->MAllocaCommands.empty(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 7ad8e2bf88b76..6fc47bcf6b5de 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -141,7 +141,11 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, AccImpl->resize(MNDRDesc.GlobalSize.size()); } MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift); - if (!IsKernelCreatedFromSource) { + + // TODO ESIMD currently does not suport offset, memory and access ranges - + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) { // Dimensionality of the buffer is 1 when dimensionality of the // accessor is 0. const size_t SizeAccField = diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a872ac31392ac..1425b82d13d52 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3736,6 +3736,8 @@ _ZN2cl4sycl6detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS _ZN2cl4sycl6detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN2cl4sycl6detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EERSB_ _ZN2cl4sycl6detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EERS9_ +_ZN2cl4sycl6detail13MemoryManager18releaseImageBufferESt10shared_ptrINS1_12context_implEEPv +_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE _ZN2cl4sycl6detail14getBorderColorENS0_19image_channel_orderE _ZN2cl4sycl6detail14host_half_impl4halfC1ERKf _ZN2cl4sycl6detail14host_half_impl4halfC2ERKf diff --git a/sycl/test/check_device_code/kernel_arguments_as.cpp b/sycl/test/check_device_code/kernel_arguments_as.cpp index 0c4c4a1dd2b49..fc1cfca2f549c 100644 --- a/sycl/test/check_device_code/kernel_arguments_as.cpp +++ b/sycl/test/check_device_code/kernel_arguments_as.cpp @@ -4,7 +4,8 @@ // Check the address space of the pointer in accessor class. // // CHECK: %struct{{.*}}AccWrapper = type { %"class{{.*}}cl::sycl::accessor" } -// CHECK-NEXT: %"class{{.*}}cl::sycl::accessor" = type { %"class{{.*}}AccessorImplDevice", i32 addrspace(1)* } +// CHECK: %"class{{.*}}cl::sycl::accessor" = type { %"class{{.*}}AccessorImplDevice", %[[UNION:.*]] } +// CHECK: %[[UNION]] = type { i32 addrspace(1)* } // CHECK: %struct{{.*}}AccWrapper = type { %"class{{.*}}cl::sycl::accessor" } // CHECK-NEXT: %"class{{.*}}cl::sycl::accessor" = type { %"class{{.*}}LocalAccessorBaseDevice", i32 addrspace(3)* } //