diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index 88c503e02efe4..b001bc914b196 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -233,10 +233,12 @@ template extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToSampledImageINTEL(HandleT); -#define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy -#define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy - // Atomic SPIR-V builtins +// TODO: drop these forward-declarations. +// As of now, compiler does not forward-declare long long overloads for +// these and as such we can't drop anything from here. But ideally, we should +// rely on the compiler to generate those - that would allow to drop +// spirv_ops.hpp include from more files. #define __SPIRV_ATOMIC_LOAD(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad(AS Type *P, int S, \ int O) noexcept; @@ -792,10 +794,6 @@ extern __DPCPP_SYCL_EXTERNAL __ocl_WPipeTy __spirv_CreatePipeFromPipeStorage_write( const ConstantPipeStorage *Storage) noexcept; -extern __DPCPP_SYCL_EXTERNAL void -__spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr, - size_t NumBytes) noexcept; - extern __DPCPP_SYCL_EXTERNAL float __spirv_ConvertBF16ToFINTEL(uint16_t) noexcept; extern __DPCPP_SYCL_EXTERNAL uint16_t @@ -967,43 +965,4 @@ extern __DPCPP_SYCL_EXTERNAL RetT __spirv_TaskSequenceGetINTEL( extern __DPCPP_SYCL_EXTERNAL void __spirv_TaskSequenceReleaseINTEL( __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept; -#else // if !__SYCL_DEVICE_ONLY__ - -template -__SYCL_CONVERGENT__ extern __ocl_event_t -__SYCL_OpGroupAsyncCopyGlobalToLocal(int32_t, dataT *Dest, const dataT *Src, - size_t NumElements, size_t Stride, - __ocl_event_t) noexcept { - for (size_t i = 0; i < NumElements; i++) { - Dest[i] = Src[i * Stride]; - } - // A real instance of the class is not needed, return dummy pointer. - return nullptr; -} - -template -__SYCL_CONVERGENT__ extern __ocl_event_t -__SYCL_OpGroupAsyncCopyLocalToGlobal(int32_t, dataT *Dest, const dataT *Src, - size_t NumElements, size_t Stride, - __ocl_event_t) noexcept { - for (size_t i = 0; i < NumElements; i++) { - Dest[i * Stride] = Src[i]; - } - // A real instance of the class is not needed, return dummy pointer. - return nullptr; -} - -extern __SYCL_EXPORT void __spirv_ocl_prefetch(const char *Ptr, - size_t NumBytes) noexcept; - -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, - uint32_t Semantics) noexcept; - -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept; - -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, - __ocl_event_t *WaitEvents) noexcept; #endif // !__SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/access/access.hpp b/sycl/include/sycl/access/access.hpp index 9c3f4e8c7fd67..1ef5de5931f4d 100644 --- a/sycl/include/sycl/access/access.hpp +++ b/sycl/include/sycl/access/access.hpp @@ -11,7 +11,6 @@ #include // for __SYCL2020_DEPRECATED #ifdef __SYCL_DEVICE_ONLY__ -#include #include #endif @@ -319,123 +318,6 @@ template struct remove_decoration { template using remove_decoration_t = typename remove_decoration::type; -namespace detail { -#ifdef __SYCL_DEVICE_ONLY__ -inline constexpr bool -address_space_cast_is_possible(access::address_space Src, - access::address_space Dst) { - // constant_space is unique and is not interchangeable with any other. - auto constant_space = access::address_space::constant_space; - if (Src == constant_space || Dst == constant_space) - return Src == Dst; - - auto generic_space = access::address_space::generic_space; - if (Src == Dst || Src == generic_space || Dst == generic_space) - return true; - - // global_host/global_device could be casted to/from global - auto global_space = access::address_space::global_space; - auto global_device = access::address_space::ext_intel_global_device_space; - auto global_host = access::address_space::ext_intel_global_host_space; - - if (Src == global_space || Dst == global_space) { - auto Other = Src == global_space ? Dst : Src; - if (Other == global_device || Other == global_host) - return true; - } - - // No more compatible combinations. - return false; -} - -template -auto static_address_cast(ElementType *Ptr) { - constexpr auto SrcAS = deduce_AS::value; - static_assert(address_space_cast_is_possible(SrcAS, Space)); - - using dst_type = typename DecoratedType< - std::remove_pointer_t>, Space>::type *; - - // Note: reinterpret_cast isn't enough for some of the casts between different - // address spaces, use C-style cast instead. - return (dst_type)Ptr; -} - -// Previous implementation (`castAS`, used in `multi_ptr` ctors among other -// places), used C-style cast instead of a proper dynamic check for some -// backends/spaces. `SupressNotImplementedAssert = true` parameter is emulating -// that previous behavior until the proper support is added for compatibility -// reasons. -template -auto dynamic_address_cast(ElementType *Ptr) { - constexpr auto generic_space = access::address_space::generic_space; - constexpr auto global_space = access::address_space::global_space; - constexpr auto local_space = access::address_space::local_space; - constexpr auto private_space = access::address_space::private_space; - constexpr auto global_device = - access::address_space::ext_intel_global_device_space; - constexpr auto global_host = - access::address_space::ext_intel_global_host_space; - - constexpr auto SrcAS = deduce_AS::value; - using dst_type = typename DecoratedType< - std::remove_pointer_t>, Space>::type *; - using RemoveCvT = std::remove_cv_t; - - if constexpr (!address_space_cast_is_possible(SrcAS, Space)) { - return (dst_type) nullptr; - } else if constexpr (Space == generic_space) { - return (dst_type)Ptr; - } else if constexpr (Space == global_space && - (SrcAS == global_device || SrcAS == global_host)) { - return (dst_type)Ptr; - } else if constexpr (SrcAS == global_space && - (Space == global_device || Space == global_host)) { -#if defined(__ENABLE_USM_ADDR_SPACE__) - static_assert(SupressNotImplementedAssert || Space != Space, - "Not supported yet!"); - return detail::static_address_cast(Ptr); -#else - // If __ENABLE_USM_ADDR_SPACE__ isn't defined then both - // global_device/global_host are just aliases for global_space. - static_assert(std::is_same_v); - return (dst_type)Ptr; -#endif - } else if constexpr (Space == global_space) { - return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal( - const_cast(Ptr), __spv::StorageClass::CrossWorkgroup); - } else if constexpr (Space == local_space) { - return (dst_type)__spirv_GenericCastToPtrExplicit_ToLocal( - const_cast(Ptr), __spv::StorageClass::Workgroup); - } else if constexpr (Space == private_space) { - return (dst_type)__spirv_GenericCastToPtrExplicit_ToPrivate( - const_cast(Ptr), __spv::StorageClass::Function); -#if !defined(__ENABLE_USM_ADDR_SPACE__) - } else if constexpr (SrcAS == generic_space && - (Space == global_device || Space == global_host)) { - return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal( - const_cast(Ptr), __spv::StorageClass::CrossWorkgroup); -#endif - } else { - static_assert(SupressNotImplementedAssert || Space != Space, - "Not supported yet!"); - return detail::static_address_cast(Ptr); - } -} -#else // __SYCL_DEVICE_ONLY__ -template -auto static_address_cast(ElementType *Ptr) { - return Ptr; -} -template -auto dynamic_address_cast(ElementType *Ptr) { - return Ptr; -} -#endif // __SYCL_DEVICE_ONLY__ -} // namespace detail - #undef __OPENCL_GLOBAL_AS__ #undef __OPENCL_GLOBAL_DEVICE_AS__ #undef __OPENCL_GLOBAL_HOST_AS__ diff --git a/sycl/include/sycl/detail/address_space_cast.hpp b/sycl/include/sycl/detail/address_space_cast.hpp new file mode 100644 index 0000000000000..3d878631900fd --- /dev/null +++ b/sycl/include/sycl/detail/address_space_cast.hpp @@ -0,0 +1,137 @@ +//==------- address_space_cast.hpp --- Implementation of AS casts ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { + +namespace detail { +#ifdef __SYCL_DEVICE_ONLY__ +inline constexpr bool +address_space_cast_is_possible(access::address_space Src, + access::address_space Dst) { + // constant_space is unique and is not interchangeable with any other. + auto constant_space = access::address_space::constant_space; + if (Src == constant_space || Dst == constant_space) + return Src == Dst; + + auto generic_space = access::address_space::generic_space; + if (Src == Dst || Src == generic_space || Dst == generic_space) + return true; + + // global_host/global_device could be casted to/from global + auto global_space = access::address_space::global_space; + auto global_device = access::address_space::ext_intel_global_device_space; + auto global_host = access::address_space::ext_intel_global_host_space; + + if (Src == global_space || Dst == global_space) { + auto Other = Src == global_space ? Dst : Src; + if (Other == global_device || Other == global_host) + return true; + } + + // No more compatible combinations. + return false; +} + +template +auto static_address_cast(ElementType *Ptr) { + constexpr auto SrcAS = deduce_AS::value; + static_assert(address_space_cast_is_possible(SrcAS, Space)); + + using dst_type = typename DecoratedType< + std::remove_pointer_t>, Space>::type *; + + // Note: reinterpret_cast isn't enough for some of the casts between different + // address spaces, use C-style cast instead. + return (dst_type)Ptr; +} + +// Previous implementation (`castAS`, used in `multi_ptr` ctors among other +// places), used C-style cast instead of a proper dynamic check for some +// backends/spaces. `SupressNotImplementedAssert = true` parameter is emulating +// that previous behavior until the proper support is added for compatibility +// reasons. +template +auto dynamic_address_cast(ElementType *Ptr) { + constexpr auto generic_space = access::address_space::generic_space; + constexpr auto global_space = access::address_space::global_space; + constexpr auto local_space = access::address_space::local_space; + constexpr auto private_space = access::address_space::private_space; + constexpr auto global_device = + access::address_space::ext_intel_global_device_space; + constexpr auto global_host = + access::address_space::ext_intel_global_host_space; + + constexpr auto SrcAS = deduce_AS::value; + using dst_type = typename DecoratedType< + std::remove_pointer_t>, Space>::type *; + using RemoveCvT = std::remove_cv_t; + + if constexpr (!address_space_cast_is_possible(SrcAS, Space)) { + return (dst_type) nullptr; + } else if constexpr (Space == generic_space) { + return (dst_type)Ptr; + } else if constexpr (Space == global_space && + (SrcAS == global_device || SrcAS == global_host)) { + return (dst_type)Ptr; + } else if constexpr (SrcAS == global_space && + (Space == global_device || Space == global_host)) { +#if defined(__ENABLE_USM_ADDR_SPACE__) + static_assert(SupressNotImplementedAssert || Space != Space, + "Not supported yet!"); + return detail::static_address_cast(Ptr); +#else + // If __ENABLE_USM_ADDR_SPACE__ isn't defined then both + // global_device/global_host are just aliases for global_space. + static_assert(std::is_same_v); + return (dst_type)Ptr; +#endif + } else if constexpr (Space == global_space) { + return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal( + const_cast(Ptr), __spv::StorageClass::CrossWorkgroup); + } else if constexpr (Space == local_space) { + return (dst_type)__spirv_GenericCastToPtrExplicit_ToLocal( + const_cast(Ptr), __spv::StorageClass::Workgroup); + } else if constexpr (Space == private_space) { + return (dst_type)__spirv_GenericCastToPtrExplicit_ToPrivate( + const_cast(Ptr), __spv::StorageClass::Function); +#if !defined(__ENABLE_USM_ADDR_SPACE__) + } else if constexpr (SrcAS == generic_space && + (Space == global_device || Space == global_host)) { + return (dst_type)__spirv_GenericCastToPtrExplicit_ToGlobal( + const_cast(Ptr), __spv::StorageClass::CrossWorkgroup); +#endif + } else { + static_assert(SupressNotImplementedAssert || Space != Space, + "Not supported yet!"); + return detail::static_address_cast(Ptr); + } +} +#else // __SYCL_DEVICE_ONLY__ +template +auto static_address_cast(ElementType *Ptr) { + return Ptr; +} +template +auto dynamic_address_cast(ElementType *Ptr) { + return Ptr; +} +#endif // __SYCL_DEVICE_ONLY__ +} // namespace detail + +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp index 541668ed374d9..cbdc1e7e0ffce 100644 --- a/sycl/include/sycl/detail/spirv.hpp +++ b/sycl/include/sycl/detail/spirv.hpp @@ -10,6 +10,13 @@ #ifdef __SYCL_DEVICE_ONLY__ +// Some __spirv_* inrinsics are automatically forward-declared by the compiler, +// but not all of them. For example: +// __spirv_AtomicStore(unsigned long long*, ...) +// Therefore, we need the following include to get forward-declarations of those +// versions. +#include + #include // for IdToMaskPosition #if defined(__NVPTX__) diff --git a/sycl/include/sycl/device_event.hpp b/sycl/include/sycl/device_event.hpp index e68662d731fa2..cf14d265bf71a 100644 --- a/sycl/include/sycl/device_event.hpp +++ b/sycl/include/sycl/device_event.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include namespace sycl { @@ -31,7 +30,12 @@ class device_event { device_event(__ocl_event_t Event) : m_Event(Event) {} - void wait() { __spirv_GroupWaitEvents(__spv::Scope::Workgroup, 1, &m_Event); } + void wait() { + (void)m_Event; +#ifdef __SYCL_DEVICE_ONLY__ + __spirv_GroupWaitEvents(__spv::Scope::Workgroup, 1, &m_Event); +#endif + } }; } // namespace _V1 diff --git a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp index 1bc5ae0b0b61b..d39b0b9660f41 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp @@ -19,6 +19,10 @@ #include #include +#ifdef __SYCL_DEVICE_ONLY__ +#include +#endif + #include namespace sycl { diff --git a/sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp b/sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp index 4708cd2224e19..484f3a5feb9a8 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include namespace sycl { diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp index e6544010ad178..4a12191f74403 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include // for sycl::bit_cast #include // for ceil, cos, exp, exp10, exp2 #include // sycl::detail::memcpy diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index e6fdba4cdad45..52d21c1a28837 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include // for half #include // for to_vec2 #include // for __SYCL_ALWAYS_INLINE diff --git a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp index 48542c8b66036..5ee3e1d1d0591 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp @@ -10,6 +10,7 @@ #pragma once +#include #include #include #include diff --git a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp index 29c25d6a0860b..a10ac625acfc4 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp @@ -73,10 +73,10 @@ template void prefetch_impl(T *ptr, size_t bytes, Properties properties) { #ifdef __SYCL_DEVICE_ONLY__ auto *ptrGlobalAS = - reinterpret_cast<__attribute__((opencl_global)) const char *>( + reinterpret_cast<__attribute__((opencl_global)) const unsigned char *>( detail::static_address_cast( const_cast(ptr))); - const __attribute__((opencl_global)) char *ptrAnnotated = nullptr; + const __attribute__((opencl_global)) unsigned char *ptrAnnotated = nullptr; if constexpr (!properties.template has_property()) { ptrAnnotated = __builtin_intel_sycl_ptr_annotation( ptrGlobalAS, "sycl-prefetch-hint", static_cast(cache_level::L1)); diff --git a/sycl/include/sycl/group.hpp b/sycl/include/sycl/group.hpp index 50b5fe6e586ad..9402d2fb83d0e 100644 --- a/sycl/include/sycl/group.hpp +++ b/sycl/include/sycl/group.hpp @@ -8,7 +8,6 @@ #pragma once -#include // for __spirv_MemoryBarrier #include // for Scope, __ocl_event_t #include // for decorated, mode, addr... #include // for NDLoop, __SYCL_ASSERT @@ -286,11 +285,13 @@ template class __SYCL_TYPE(group) group { /// space, global address space or both based on the value of \p accessSpace. template void mem_fence( + [[maybe_unused]] typename std::enable_if_t accessSpace = access::fence_space::global_and_local) const { +#ifdef __SYCL_DEVICE_ONLY__ uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace); // TODO: currently, there is no good way in SPIR-V to set the memory // barrier only for load operations or only for store operations. @@ -301,6 +302,7 @@ template class __SYCL_TYPE(group) group { // we can fix this later, for example, by using OpenCL 1.2 functions // read_mem_fence() and write_mem_fence(). __spirv_MemoryBarrier(__spv::Scope::Workgroup, flags); +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -310,15 +312,23 @@ template class __SYCL_TYPE(group) group { /// Permitted types for dataT are all scalar and vector types, except boolean. template __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") - std::enable_if_t::value, - device_event> async_work_group_copy(local_ptr dest, - global_ptr src, - size_t numElements, - size_t srcStride) const { - __ocl_event_t E = __SYCL_OpGroupAsyncCopyGlobalToLocal( + std::enable_if_t< + !detail::is_bool::value, + device_event> async_work_group_copy([[maybe_unused]] local_ptr + dest, + [[maybe_unused]] global_ptr + src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t srcStride) + const { +#ifdef __SYCL_DEVICE_ONLY__ + __ocl_event_t E = __spirv_GroupAsyncCopy( __spv::Scope::Workgroup, detail::convertToOpenCLType(dest), detail::convertToOpenCLType(src), numElements, srcStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -328,16 +338,22 @@ template class __SYCL_TYPE(group) group { /// Permitted types for dataT are all scalar and vector types, except boolean. template __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") - std::enable_if_t::value, - device_event> async_work_group_copy(global_ptr dest, - local_ptr src, - size_t numElements, - size_t destStride) + std::enable_if_t< + !detail::is_bool::value, + device_event> async_work_group_copy([[maybe_unused]] global_ptr + dest, + [[maybe_unused]] local_ptr src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t destStride) const { - __ocl_event_t E = __SYCL_OpGroupAsyncCopyLocalToGlobal( +#ifdef __SYCL_DEVICE_ONLY__ + __ocl_event_t E = __spirv_GroupAsyncCopy( __spv::Scope::Workgroup, detail::convertToOpenCLType(dest), detail::convertToOpenCLType(src), numElements, destStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -350,13 +366,18 @@ template class __SYCL_TYPE(group) group { std::enable_if_t::value && std::is_same_v, DestDataT>, device_event> - async_work_group_copy(decorated_local_ptr dest, - decorated_global_ptr src, size_t numElements, - size_t srcStride) const { - __ocl_event_t E = __SYCL_OpGroupAsyncCopyGlobalToLocal( + async_work_group_copy([[maybe_unused]] decorated_local_ptr dest, + [[maybe_unused]] decorated_global_ptr src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t srcStride) const { +#ifdef __SYCL_DEVICE_ONLY__ + __ocl_event_t E = __spirv_GroupAsyncCopy( __spv::Scope::Workgroup, detail::convertToOpenCLType(dest), detail::convertToOpenCLType(src), numElements, srcStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -369,13 +390,18 @@ template class __SYCL_TYPE(group) group { std::enable_if_t::value && std::is_same_v, DestDataT>, device_event> - async_work_group_copy(decorated_global_ptr dest, - decorated_local_ptr src, size_t numElements, - size_t destStride) const { - __ocl_event_t E = __SYCL_OpGroupAsyncCopyLocalToGlobal( + async_work_group_copy([[maybe_unused]] decorated_global_ptr dest, + [[maybe_unused]] decorated_local_ptr src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t destStride) const { +#ifdef __SYCL_DEVICE_ONLY__ + __ocl_event_t E = __spirv_GroupAsyncCopy( __spv::Scope::Workgroup, detail::convertToOpenCLType(dest), detail::convertToOpenCLType(src), numElements, destStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Specialization for scalar bool type. diff --git a/sycl/include/sycl/kernel_handler.hpp b/sycl/include/sycl/kernel_handler.hpp index 2e2ab51e9336b..6ae2a73132967 100644 --- a/sycl/include/sycl/kernel_handler.hpp +++ b/sycl/include/sycl/kernel_handler.hpp @@ -12,10 +12,6 @@ #include // for __SYCL_ALWAYS_INLINE #include // for feature_not_supported -#ifdef __SYCL_DEVICE_ONLY__ -#include -#endif - #include // for remove_reference_t #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/multi_ptr.hpp b/sycl/include/sycl/multi_ptr.hpp index b1946fb68afc4..824a1470b2c9b 100644 --- a/sycl/include/sycl/multi_ptr.hpp +++ b/sycl/include/sycl/multi_ptr.hpp @@ -8,9 +8,9 @@ #pragma once -#include // for __spirv_ocl_prefetch -#include // for address_space, decorated -#include // for half +#include // for address_space, decorated +#include // for half +#include #include // for __SYCL2020_DEPRECATED #include // for const_if_const_AS #include // for BIsRepresentationT diff --git a/sycl/include/sycl/nd_item.hpp b/sycl/include/sycl/nd_item.hpp index f1a5bfc63c917..76ec89f2bba70 100644 --- a/sycl/include/sycl/nd_item.hpp +++ b/sycl/include/sycl/nd_item.hpp @@ -8,7 +8,6 @@ #pragma once -#include // for __spirv_ControlBarrier #include // for Scope #include // for initLocalInvocationId #include // for mode, fence_space @@ -198,11 +197,13 @@ template class nd_item { get_offset()); } - void barrier(access::fence_space accessSpace = + void barrier([[maybe_unused]] access::fence_space accessSpace = access::fence_space::global_and_local) const { +#ifdef __SYCL_DEVICE_ONLY__ uint32_t flags = _V1::detail::getSPIRVMemorySemanticsMask(accessSpace); __spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup, flags); +#endif } /// Executes a work-group mem-fence with memory ordering on the local address @@ -210,11 +211,13 @@ template class nd_item { template __SYCL2020_DEPRECATED("use sycl::atomic_fence() free function instead") void mem_fence( + [[maybe_unused]] typename std::enable_if_t accessSpace = access::fence_space::global_and_local) const { +#if __SYCL_DEVICE_ONLY__ uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace); // TODO: currently, there is no good way in SPIR-V to set the memory // barrier only for load operations or only for store operations. @@ -225,6 +228,7 @@ template class nd_item { // we can fix this later, for example, by using OpenCL 1.2 functions // read_mem_fence() and write_mem_fence(). __spirv_MemoryBarrier(__spv::Scope::Workgroup, flags); +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -234,18 +238,26 @@ template class nd_item { /// Permitted types for dataT are all scalar and vector types, except boolean. template __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") - std::enable_if_t::value, - device_event> async_work_group_copy(local_ptr dest, - global_ptr src, - size_t numElements, - size_t srcStride) const { + std::enable_if_t< + !detail::is_bool::value, + device_event> async_work_group_copy([[maybe_unused]] local_ptr + dest, + [[maybe_unused]] global_ptr + src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t srcStride) + const { +#ifdef __SYCL_DEVICE_ONLY__ using DestT = detail::ConvertToOpenCLType_t; using SrcT = detail::ConvertToOpenCLType_t; - __ocl_event_t E = __SYCL_OpGroupAsyncCopyGlobalToLocal( - __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), - numElements, srcStride, 0); + __ocl_event_t E = + __spirv_GroupAsyncCopy(__spv::Scope::Workgroup, DestT(dest.get()), + SrcT(src.get()), numElements, srcStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -255,19 +267,25 @@ template class nd_item { /// Permitted types for dataT are all scalar and vector types, except boolean. template __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") - std::enable_if_t::value, - device_event> async_work_group_copy(global_ptr dest, - local_ptr src, - size_t numElements, - size_t destStride) + std::enable_if_t< + !detail::is_bool::value, + device_event> async_work_group_copy([[maybe_unused]] global_ptr + dest, + [[maybe_unused]] local_ptr src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t destStride) const { +#ifdef __SYCL_DEVICE_ONLY__ using DestT = detail::ConvertToOpenCLType_t; using SrcT = detail::ConvertToOpenCLType_t; - __ocl_event_t E = __SYCL_OpGroupAsyncCopyLocalToGlobal( - __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), - numElements, destStride, 0); + __ocl_event_t E = + __spirv_GroupAsyncCopy(__spv::Scope::Workgroup, DestT(dest.get()), + SrcT(src.get()), numElements, destStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -280,16 +298,21 @@ template class nd_item { std::enable_if_t::value && std::is_same_v, DestDataT>, device_event> - async_work_group_copy(decorated_local_ptr dest, - decorated_global_ptr src, size_t numElements, - size_t srcStride) const { + async_work_group_copy([[maybe_unused]] decorated_local_ptr dest, + [[maybe_unused]] decorated_global_ptr src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t srcStride) const { +#ifdef __SYCL_DEVICE_ONLY__ using DestT = detail::ConvertToOpenCLType_t; using SrcT = detail::ConvertToOpenCLType_t; - __ocl_event_t E = __SYCL_OpGroupAsyncCopyGlobalToLocal( - __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), - numElements, srcStride, 0); + __ocl_event_t E = + __spirv_GroupAsyncCopy(__spv::Scope::Workgroup, DestT(dest.get()), + SrcT(src.get()), numElements, srcStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Asynchronously copies a number of elements specified by \p numElements @@ -302,16 +325,21 @@ template class nd_item { std::enable_if_t::value && std::is_same_v, DestDataT>, device_event> - async_work_group_copy(decorated_global_ptr dest, - decorated_local_ptr src, size_t numElements, - size_t destStride) const { + async_work_group_copy([[maybe_unused]] decorated_global_ptr dest, + [[maybe_unused]] decorated_local_ptr src, + [[maybe_unused]] size_t numElements, + [[maybe_unused]] size_t destStride) const { +#ifdef __SYCL_DEVICE_ONLY__ using DestT = detail::ConvertToOpenCLType_t; using SrcT = detail::ConvertToOpenCLType_t; - __ocl_event_t E = __SYCL_OpGroupAsyncCopyLocalToGlobal( - __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), - numElements, destStride, 0); + __ocl_event_t E = + __spirv_GroupAsyncCopy(__spv::Scope::Workgroup, DestT(dest.get()), + SrcT(src.get()), numElements, destStride, 0); return device_event(E); +#else + return nullptr; +#endif } /// Specialization for scalar bool type. diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index a44f42c44f307..005b7746539f8 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -8,7 +8,9 @@ #pragma once -#include // for address_space, decorated +#include +#include // for address_space, decorated +#include #include // for __SYCL_DEPRECATED #include // for select_cl_scalar_inte... #include // for is_scalar_arithmetic diff --git a/sycl/test/extensions/address_cast_negative.cpp b/sycl/test/extensions/address_cast_negative.cpp index f325d283eed9e..01aafaf85530c 100644 --- a/sycl/test/extensions/address_cast_negative.cpp +++ b/sycl/test/extensions/address_cast_negative.cpp @@ -5,10 +5,10 @@ using namespace sycl::ext::oneapi::experimental; SYCL_EXTERNAL void test(int *p) { - // expected-error-re@sycl/access/access.hpp:* {{{{.*}}Not supported yet!}} + // expected-error-re@sycl/detail/address_space_cast.hpp:* {{{{.*}}Not supported yet!}} std::ignore = dynamic_address_cast< sycl::access::address_space::ext_intel_global_device_space>(p); - // expected-error-re@sycl/access/access.hpp:* {{{{.*}}Not supported yet!}} + // expected-error-re@sycl/detail/address_space_cast.hpp:* {{{{.*}}Not supported yet!}} std::ignore = dynamic_address_cast< sycl::access::address_space::ext_intel_global_host_space>(p); } diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp index af86805dc146f..9df04c4d8d9b6 100644 --- a/sycl/test/include_deps/sycl_accessor.hpp.cpp +++ b/sycl/test/include_deps/sycl_accessor.hpp.cpp @@ -7,16 +7,16 @@ // CHECK-NEXT: accessor.hpp // CHECK-NEXT: access/access.hpp // CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: atomic.hpp // CHECK-NEXT: __spirv/spirv_types.hpp // CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: atomic.hpp // CHECK-NEXT: detail/helpers.hpp +// CHECK-NEXT: detail/export.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: __spirv/spirv_vars.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: aliases.hpp +// CHECK-NEXT: detail/address_space_cast.hpp // CHECK-NEXT: detail/type_traits.hpp // CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp // CHECK-NEXT: half_type.hpp diff --git a/sycl/test/include_deps/sycl_buffer.hpp.cpp b/sycl/test/include_deps/sycl_buffer.hpp.cpp index 0c38ee6bde31c..0a821e1fe79c0 100644 --- a/sycl/test/include_deps/sycl_buffer.hpp.cpp +++ b/sycl/test/include_deps/sycl_buffer.hpp.cpp @@ -7,18 +7,17 @@ // CHECK-NEXT: buffer.hpp // CHECK-NEXT: access/access.hpp // CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/export.hpp // CHECK-NEXT: backend_types.hpp // CHECK-NEXT: detail/array.hpp // CHECK-NEXT: exception.hpp +// CHECK-NEXT: detail/export.hpp // CHECK-NEXT: detail/string.hpp // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: stl_wrappers/cassert // CHECK-NEXT: stl_wrappers/assert.h // CHECK-NEXT: __spirv/spirv_vars.hpp +// CHECK-NEXT: __spirv/spirv_types.hpp +// CHECK-NEXT: detail/defines.hpp // CHECK-NEXT: detail/helpers.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: detail/iostream_proxy.hpp diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 1240a1e514056..6b5b40470350b 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -8,16 +8,16 @@ // CHECK-NEXT: accessor.hpp // CHECK-NEXT: access/access.hpp // CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: atomic.hpp // CHECK-NEXT: __spirv/spirv_types.hpp // CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: atomic.hpp // CHECK-NEXT: detail/helpers.hpp +// CHECK-NEXT: detail/export.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: __spirv/spirv_vars.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: aliases.hpp +// CHECK-NEXT: detail/address_space_cast.hpp // CHECK-NEXT: detail/type_traits.hpp // CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp // CHECK-NEXT: half_type.hpp