diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 276044efab979..0289210031b07 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -913,7 +913,7 @@ class __SYCL_EXPORT handler { .template get_property< syclex::cuda::cluster_size_key>() .get_cluster_size(); - setKernelClusterLaunch(padRange(ClusterSize), ClusterDim); + setKernelClusterLaunch(ClusterSize); } } @@ -3718,7 +3718,9 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. - void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); + void setKernelClusterLaunch(sycl::range<3> ClusterSize); + void setKernelClusterLaunch(sycl::range<2> ClusterSize); + void setKernelClusterLaunch(sycl::range<1> ClusterSize); // Set the request work group memory size (work_group_static ext). void setKernelWorkGroupMem(size_t Size); @@ -3819,54 +3821,37 @@ class __SYCL_EXPORT handler { bool HasAssociatedAccessor(detail::AccessorImplHost *Req, access::target AccessTarget) const; - template static sycl::range<3> padRange(sycl::range Range) { - if constexpr (Dims == 3) { - return Range; - } else { - sycl::range<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Range[I]; - return Res; - } - } - - template static sycl::id<3> padId(sycl::id Id) { - if constexpr (Dims == 3) { - return Id; - } else { - sycl::id<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Id[I]; - return Res; - } - } - template void setNDRangeDescriptor(sycl::range N, bool SetNumWorkGroups = false) { - return setNDRangeDescriptorPadded(padRange(N), SetNumWorkGroups, Dims); + return setNDRangeDescriptor(N, SetNumWorkGroups); } template void setNDRangeDescriptor(sycl::range NumWorkItems, sycl::id Offset) { - return setNDRangeDescriptorPadded(padRange(NumWorkItems), padId(Offset), - Dims); + return setNDRangeDescriptor(NumWorkItems, Offset); } template void setNDRangeDescriptor(sycl::nd_range ExecutionRange) { - return setNDRangeDescriptorPadded( - padRange(ExecutionRange.get_global_range()), - padRange(ExecutionRange.get_local_range()), - padId(ExecutionRange.get_offset()), Dims); - } - - void setNDRangeDescriptorPadded(sycl::range<3> N, bool SetNumWorkGroups, - int Dims); - void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::id<3> Offset, int Dims); - void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::range<3> LocalSize, sycl::id<3> Offset, - int Dims); + return setNDRangeDescriptor(ExecutionRange.get_global_range(), + ExecutionRange.get_local_range(), + ExecutionRange.get_offset()); + } + + void setNDRangeDescriptor(sycl::range<3> N, bool SetNumWorkGroups); + void setNDRangeDescriptor(sycl::range<3> NumWorkItems, sycl::id<3> Offset); + void setNDRangeDescriptor(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset); + + void setNDRangeDescriptor(sycl::range<2> N, bool SetNumWorkGroups); + void setNDRangeDescriptor(sycl::range<2> NumWorkItems, sycl::id<2> Offset); + void setNDRangeDescriptor(sycl::range<2> NumWorkItems, + sycl::range<2> LocalSize, sycl::id<2> Offset); + + void setNDRangeDescriptor(sycl::range<1> N, bool SetNumWorkGroups); + void setNDRangeDescriptor(sycl::range<1> NumWorkItems, sycl::id<1> Offset); + void setNDRangeDescriptor(sycl::range<1> NumWorkItems, + sycl::range<1> LocalSize, sycl::id<1> Offset); void setKernelInfo(void *KernelFuncPtr, int KernelNumArgs, detail::kernel_param_desc_t (*KernelParamDescGetter)(int), diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index d33d65c3b90e9..ce0aed954733c 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -62,99 +62,96 @@ class ArgDesc { // The structure represents NDRange - global, local sizes, global offset and // number of dimensions. -class NDRDescT { - // The method initializes all sizes for dimensions greater than the passed one - // to the default values, so they will not affect execution. - void setNDRangeLeftover() { - for (int I = Dims; I < 3; ++I) { - GlobalSize[I] = 1; - LocalSize[I] = LocalSize[0] ? 1 : 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - } - template static sycl::range<3> padRange(sycl::range Range) { - if constexpr (Dims == 3) { - return Range; - } else { - sycl::range<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Range[I]; - return Res; - } - } - - template static sycl::id<3> padId(sycl::id Id) { - if constexpr (Dims == 3) { - return Id; - } else { - sycl::id<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Id[I]; - return Res; - } - } +// TODO: A lot of tests rely on particular values to be set for dimensions that +// are not used. To clarify, for example, if a 2D kernel is invoked, in +// NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0 +// depending on which constructor is used for no clear reason. +// Instead, only sensible defaults should be used and tests should be updated +// to reflect this. +class NDRDescT { public: NDRDescT() = default; NDRDescT(const NDRDescT &Desc) = default; NDRDescT(NDRDescT &&Desc) = default; - NDRDescT(sycl::range<3> N, bool SetNumWorkGroups, int DimsArg) - : GlobalSize{SetNumWorkGroups ? sycl::range<3>{0, 0, 0} : N}, - NumWorkGroups{SetNumWorkGroups ? N : sycl::range<3>{0, 0, 0}}, - Dims{size_t(DimsArg)} { - setNDRangeLeftover(); - } + template + NDRDescT(sycl::range N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} { + if (SetNumWorkGroups) { + for (size_t I = 0; I < Dims_; ++I) { + NumWorkGroups[I] = N[I]; + } + } else { + for (size_t I = 0; I < Dims_; ++I) { + GlobalSize[I] = N[I]; + } - NDRDescT(sycl::range<3> NumWorkItems, sycl::range<3> LocalSize, - sycl::id<3> Offset, int DimsArg) - : GlobalSize{NumWorkItems}, LocalSize{LocalSize}, GlobalOffset{Offset}, - Dims{size_t(DimsArg)} { - setNDRangeLeftover(); + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 1; + } + } } - NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg) - : GlobalSize{NumWorkItems}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} {} + template + NDRDescT(sycl::range NumWorkItems, sycl::range LocalSizes, + sycl::id Offset) + : Dims{size_t(Dims_)} { + for (size_t I = 0; I < Dims_; ++I) { + GlobalSize[I] = NumWorkItems[I]; + LocalSize[I] = LocalSizes[I]; + GlobalOffset[I] = Offset[I]; + } + + for (int I = Dims_; I < 3; ++I) { + LocalSize[I] = LocalSizes[0] ? 1 : 0; + } + + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 1; + } + } template - NDRDescT(sycl::nd_range ExecutionRange, int DimsArg) - : NDRDescT(padRange(ExecutionRange.get_global_range()), - padRange(ExecutionRange.get_local_range()), - padId(ExecutionRange.get_offset()), size_t(DimsArg)) { - setNDRangeLeftover(); + NDRDescT(sycl::range NumWorkItems, sycl::id Offset) + : Dims{size_t(Dims_)} { + for (size_t I = 0; I < Dims_; ++I) { + GlobalSize[I] = NumWorkItems[I]; + GlobalOffset[I] = Offset[I]; + } } template NDRDescT(sycl::nd_range ExecutionRange) - : NDRDescT(ExecutionRange, Dims_) {} + : NDRDescT(ExecutionRange.get_global_range(), + ExecutionRange.get_local_range(), + ExecutionRange.get_offset()) {} template NDRDescT(sycl::range Range) - : NDRDescT(padRange(Range), /*SetNumWorkGroups=*/false, Dims_) {} + : NDRDescT(Range, /*SetNumWorkGroups=*/false) {} - void setClusterDimensions(sycl::range<3> N, int Dims) { - if (this->Dims != size_t(Dims)) { + template void setClusterDimensions(sycl::range N) { + if (this->Dims != size_t(Dims_)) { throw std::runtime_error( "Dimensionality of cluster, global and local ranges must be same"); } - for (int I = 0; I < 3; ++I) - ClusterDimensions[I] = (I < Dims) ? N[I] : 1; + for (int I = 0; I < Dims_; ++I) + ClusterDimensions[I] = N[I]; } NDRDescT &operator=(const NDRDescT &Desc) = default; NDRDescT &operator=(NDRDescT &&Desc) = default; - sycl::range<3> GlobalSize{0, 0, 0}; - sycl::range<3> LocalSize{0, 0, 0}; - sycl::id<3> GlobalOffset{0, 0, 0}; + std::array GlobalSize{0, 0, 0}; + std::array LocalSize{0, 0, 0}; + std::array GlobalOffset{0, 0, 0}; /// Number of workgroups, used to record the number of workgroups from the /// simplest form of parallel_for_work_group. If set, all other fields must be /// zero - sycl::range<3> NumWorkGroups{0, 0, 0}; - sycl::range<3> ClusterDimensions{1, 1, 1}; + std::array NumWorkGroups{0, 0, 0}; + std::array ClusterDimensions{1, 1, 1}; size_t Dims = 0; }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f159bf5e41832..d775728ab7bcd 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2276,8 +2276,11 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, ur_kernel_handle_t Kernel, if (WGSize[0] == 0) { WGSize = {1, 1, 1}; } - NDR = sycl::detail::NDRDescT{nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize), - static_cast(NDR.Dims)}; + + for (size_t I = 0; I < NDR.Dims; ++I) { + NDR.GlobalSize[I] = WGSize[I] * NDR.NumWorkGroups[I]; + NDR.LocalSize[I] = WGSize[I]; + } } // We have the following mapping between dimensions with SPIR-V builtins: diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index e6b646a1343e9..8345884af4c05 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1067,6 +1067,10 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { using detail::kernel_param_kind_t; + size_t GlobalSize = impl->MNDRDesc.GlobalSize[0]; + for (size_t I = 1; I < impl->MNDRDesc.Dims; ++I) { + GlobalSize *= impl->MNDRDesc.GlobalSize[I]; + } switch (Kind) { case kernel_param_kind_t::kind_std_layout: @@ -1082,32 +1086,33 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, static_cast(&S->GlobalBuf); detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase); detail::Requirement *GBufReq = GBufImpl.get(); - addArgsForGlobalAccessor( - GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, impl->MArgs, + IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase); detail::Requirement *GOffsetReq = GOfssetImpl.get(); - addArgsForGlobalAccessor( - GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, impl->MArgs, + IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase); detail::Requirement *GFlushReq = GFlushImpl.get(); - size_t GlobalSize = impl->MNDRDesc.GlobalSize.size(); // If work group size wasn't set explicitly then it must be recieved // from kernel attribute or set to default values. // For now we can't get this attribute here. // So we just suppose that WG size is always default for stream. // TODO adjust MNDRDesc when device image contains kernel's attribute if (GlobalSize == 0) { - // Suppose that work group size is 1 for every dimension - GlobalSize = impl->MNDRDesc.NumWorkGroups.size(); + GlobalSize = impl->MNDRDesc.NumWorkGroups[0]; + for (size_t I = 1; I < impl->MNDRDesc.Dims; ++I) { + GlobalSize *= impl->MNDRDesc.NumWorkGroups[I]; + } } addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, IsKernelCreatedFromSource, GlobalSize, impl->MArgs, @@ -1127,9 +1132,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::device: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - addArgsForGlobalAccessor( - AccImpl, Index, IndexShift, Size, IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); break; } case access::target::local: { @@ -2246,12 +2251,28 @@ void handler::setKernelIsCooperative(bool KernelIsCooperative) { impl->MKernelIsCooperative = KernelIsCooperative; } -void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { +void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(); + impl->MKernelUsesClusterLaunch = true; + impl->MNDRDesc.setClusterDimensions(ClusterSize); +} + +void handler::setKernelClusterLaunch(sycl::range<2> ClusterSize) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(); + impl->MKernelUsesClusterLaunch = true; + impl->MNDRDesc.setClusterDimensions(ClusterSize); +} + +void handler::setKernelClusterLaunch(sycl::range<1> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); impl->MKernelUsesClusterLaunch = true; - impl->MNDRDesc.setClusterDimensions(ClusterSize, Dims); + impl->MNDRDesc.setClusterDimensions(ClusterSize); } void handler::setKernelWorkGroupMem(size_t Size) { @@ -2406,18 +2427,43 @@ bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req, void handler::setType(sycl::detail::CGType Type) { impl->MCGType = Type; } sycl::detail::CGType handler::getType() const { return impl->MCGType; } -void handler::setNDRangeDescriptorPadded(sycl::range<3> N, - bool SetNumWorkGroups, int Dims) { - impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups, Dims}; +void handler::setNDRangeDescriptor(sycl::range<3> N, bool SetNumWorkGroups) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; +} +void handler::setNDRangeDescriptor(sycl::range<3> NumWorkItems, + sycl::id<3> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; +} +void handler::setNDRangeDescriptor(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, + sycl::id<3> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; +} + +void handler::setNDRangeDescriptor(sycl::range<2> N, bool SetNumWorkGroups) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; +} +void handler::setNDRangeDescriptor(sycl::range<2> NumWorkItems, + sycl::id<2> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; +} +void handler::setNDRangeDescriptor(sycl::range<2> NumWorkItems, + sycl::range<2> LocalSize, + sycl::id<2> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; +} + +void handler::setNDRangeDescriptor(sycl::range<1> N, bool SetNumWorkGroups) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; } -void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::id<3> Offset, int Dims) { - impl->MNDRDesc = NDRDescT{NumWorkItems, Offset, Dims}; +void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, + sycl::id<1> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; } -void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::range<3> LocalSize, - sycl::id<3> Offset, int Dims) { - impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims}; +void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, + sycl::range<1> LocalSize, + sycl::id<1> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } void handler::setKernelNameBasedCachePtr( diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 3c953567c12dd..d69eba05eab3c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3571,6 +3571,15 @@ _ZN4sycl3_V17handler20associateWithHandlerEPNS0_6detail28SampledImageAccessorBas _ZN4sycl3_V17handler20associateWithHandlerEPNS0_6detail30UnsampledImageAccessorBaseHostENS0_12image_targetE _ZN4sycl3_V17handler20memcpyToDeviceGlobalEPKvS3_bmm _ZN4sycl3_V17handler20setKernelCacheConfigENS1_23StableKernelCacheConfigE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi1EEENS0_2idILi1EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi1EEES3_NS0_2idILi1EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi1EEEb +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi2EEENS0_2idILi2EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi2EEES3_NS0_2idILi2EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi2EEEb +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi3EEENS0_2idILi3EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi3EEES3_NS0_2idILi3EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi3EEEb _ZN4sycl3_V17handler20setStateSpecConstSetEv _ZN4sycl3_V17handler21setKernelWorkGroupMemEm _ZN4sycl3_V17handler21setUserFacingNodeTypeENS0_3ext6oneapi12experimental9node_typeE @@ -3578,7 +3587,9 @@ _ZN4sycl3_V17handler22ext_oneapi_fill2d_implEPvmPKvmmm _ZN4sycl3_V17handler22memcpyFromDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler22setHandlerKernelBundleENS0_6kernelE _ZN4sycl3_V17handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE -_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEEi +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi1EEE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi2EEE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEE _ZN4sycl3_V17handler22setKernelIsCooperativeEb _ZN4sycl3_V17handler23instantiateKernelOnHostEPv _ZN4sycl3_V17handler24GetRangeRoundingSettingsERmS2_S2_ @@ -3590,9 +3601,6 @@ _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6de _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE -_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi -_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEES3_NS0_2idILi3EEEi -_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEEbi _ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE _ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm _ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcPFNS0_6detail19kernel_param_desc_tEiEmb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 7f96e48b36d47..322d98322d117 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4401,15 +4401,23 @@ ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z ?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4StableKernelCacheConfig@123@@Z -?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$00@23@@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$01@23@@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@V?$id@$00@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@_N@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$01@23@0V?$id@$01@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$01@23@V?$id@$01@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$01@23@_N@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ