From b959782adadcccdd56023fba33f34e6f35396496 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Fri, 30 May 2025 12:44:21 +0100 Subject: [PATCH 01/17] [SYCL] Optimize NDRDescT by removing sycl::range, sycl::id and padding sycl::range and sycl::id perform validity checks every time setting them. Use std::array instead as dimensions should already be valid. In addition, remove explicitly padding dimensions smaller than 3 and get number of dimensions from template argument instead of function argument. --- sycl/include/sycl/handler.hpp | 65 ++++++-------- sycl/source/detail/cg.hpp | 100 +++++++++------------- sycl/source/detail/scheduler/commands.cpp | 11 ++- sycl/source/handler.cpp | 62 +++++++++++--- sycl/test/abi/sycl_symbols_linux.dump | 38 ++++---- sycl/test/abi/sycl_symbols_windows.dump | 56 ++++++------ 6 files changed, 178 insertions(+), 154 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 180766f874661..791501b25593b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -900,7 +900,7 @@ class __SYCL_EXPORT handler { .template get_property< syclex::cuda::cluster_size_key>() .get_cluster_size(); - setKernelClusterLaunch(padRange(ClusterSize), ClusterDim); + setKernelClusterLaunch(ClusterSize); } } @@ -3692,7 +3692,9 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. - void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); + void setKernelClusterLaunch(sycl::range<3> ClusterSize); + void setKernelClusterLaunch(sycl::range<2> ClusterSize); + void setKernelClusterLaunch(sycl::range<1> ClusterSize); // Set the request work group memory size (work_group_static ext). void setKernelWorkGroupMem(size_t Size); @@ -3793,54 +3795,37 @@ class __SYCL_EXPORT handler { bool HasAssociatedAccessor(detail::AccessorImplHost *Req, access::target AccessTarget) const; - template static sycl::range<3> padRange(sycl::range Range) { - if constexpr (Dims == 3) { - return Range; - } else { - sycl::range<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Range[I]; - return Res; - } - } - - template static sycl::id<3> padId(sycl::id Id) { - if constexpr (Dims == 3) { - return Id; - } else { - sycl::id<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Id[I]; - return Res; - } - } - template void setNDRangeDescriptor(sycl::range N, bool SetNumWorkGroups = false) { - return setNDRangeDescriptorPadded(padRange(N), SetNumWorkGroups, Dims); + return setNDRangeDescriptor(N, SetNumWorkGroups); } template void setNDRangeDescriptor(sycl::range NumWorkItems, sycl::id Offset) { - return setNDRangeDescriptorPadded(padRange(NumWorkItems), padId(Offset), - Dims); + return setNDRangeDescriptor(NumWorkItems, Offset); } template void setNDRangeDescriptor(sycl::nd_range ExecutionRange) { - return setNDRangeDescriptorPadded( - padRange(ExecutionRange.get_global_range()), - padRange(ExecutionRange.get_local_range()), - padId(ExecutionRange.get_offset()), Dims); - } - - void setNDRangeDescriptorPadded(sycl::range<3> N, bool SetNumWorkGroups, - int Dims); - void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::id<3> Offset, int Dims); - void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::range<3> LocalSize, sycl::id<3> Offset, - int Dims); + return setNDRangeDescriptor(ExecutionRange.get_global_range(), + ExecutionRange.get_local_range(), + ExecutionRange.get_offset()); + } + + void setNDRangeDescriptor(sycl::range<3> N, bool SetNumWorkGroups); + void setNDRangeDescriptor(sycl::range<3> NumWorkItems, sycl::id<3> Offset); + void setNDRangeDescriptor(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset); + + void setNDRangeDescriptor(sycl::range<2> N, bool SetNumWorkGroups); + void setNDRangeDescriptor(sycl::range<2> NumWorkItems, sycl::id<2> Offset); + void setNDRangeDescriptor(sycl::range<2> NumWorkItems, + sycl::range<2> LocalSize, sycl::id<2> Offset); + + void setNDRangeDescriptor(sycl::range<1> N, bool SetNumWorkGroups); + void setNDRangeDescriptor(sycl::range<1> NumWorkItems, sycl::id<1> Offset); + void setNDRangeDescriptor(sycl::range<1> NumWorkItems, + sycl::range<1> LocalSize, sycl::id<1> Offset); void setKernelInfo(void *KernelFuncPtr, int KernelNumArgs, detail::kernel_param_desc_t (*KernelParamDescGetter)(int), diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index ccd546cd968f1..132b220fbd699 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -63,98 +63,76 @@ class ArgDesc { // The structure represents NDRange - global, local sizes, global offset and // number of dimensions. class NDRDescT { - // The method initializes all sizes for dimensions greater than the passed one - // to the default values, so they will not affect execution. - void setNDRangeLeftover() { - for (int I = Dims; I < 3; ++I) { - GlobalSize[I] = 1; - LocalSize[I] = LocalSize[0] ? 1 : 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - } - - template static sycl::range<3> padRange(sycl::range Range) { - if constexpr (Dims == 3) { - return Range; - } else { - sycl::range<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Range[I]; - return Res; - } - } - - template static sycl::id<3> padId(sycl::id Id) { - if constexpr (Dims == 3) { - return Id; - } else { - sycl::id<3> Res{0, 0, 0}; - for (int I = 0; I < Dims; ++I) - Res[I] = Id[I]; - return Res; - } - } public: NDRDescT() = default; NDRDescT(const NDRDescT &Desc) = default; NDRDescT(NDRDescT &&Desc) = default; - NDRDescT(sycl::range<3> N, bool SetNumWorkGroups, int DimsArg) - : GlobalSize{SetNumWorkGroups ? sycl::range<3>{0, 0, 0} : N}, - NumWorkGroups{SetNumWorkGroups ? N : sycl::range<3>{0, 0, 0}}, - Dims{size_t(DimsArg)} { - setNDRangeLeftover(); + template + NDRDescT(sycl::range N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} { + if (SetNumWorkGroups) { + for (size_t I = 0; I < Dims_; ++I) { + NumWorkGroups[I] = N[I]; + } + } else { + for (size_t I = 0; I < Dims_; ++I) { + GlobalSize[I] = N[I]; + } + } } - NDRDescT(sycl::range<3> NumWorkItems, sycl::range<3> LocalSize, - sycl::id<3> Offset, int DimsArg) - : GlobalSize{NumWorkItems}, LocalSize{LocalSize}, GlobalOffset{Offset}, - Dims{size_t(DimsArg)} { - setNDRangeLeftover(); + template + NDRDescT(sycl::range NumWorkItems, sycl::range LocalSizes, + sycl::id Offset) + : Dims{size_t(Dims_)} { + for (size_t I = 0; I < Dims_; ++I) { + GlobalSize[I] = NumWorkItems[I]; + LocalSize[I] = LocalSizes[I]; + GlobalOffset[I] = Offset[I]; + } } - NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg) - : GlobalSize{NumWorkItems}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} {} - template - NDRDescT(sycl::nd_range ExecutionRange, int DimsArg) - : NDRDescT(padRange(ExecutionRange.get_global_range()), - padRange(ExecutionRange.get_local_range()), - padId(ExecutionRange.get_offset()), size_t(DimsArg)) { - setNDRangeLeftover(); + NDRDescT(sycl::range NumWorkItems, sycl::id Offset) + : Dims{size_t(Dims_)} { + for (size_t I = 0; I < Dims_; ++I) { + GlobalSize[I] = NumWorkItems[I]; + GlobalOffset[I] = Offset[I]; + } } template NDRDescT(sycl::nd_range ExecutionRange) - : NDRDescT(ExecutionRange, Dims_) {} + : NDRDescT(ExecutionRange.get_global_range(), + ExecutionRange.get_local_range(), + ExecutionRange.get_offset()) {} template NDRDescT(sycl::range Range) - : NDRDescT(padRange(Range), /*SetNumWorkGroups=*/false, Dims_) {} + : NDRDescT(Range, /*SetNumWorkGroups=*/false) {} - void setClusterDimensions(sycl::range<3> N, int Dims) { - if (this->Dims != size_t(Dims)) { + template void setClusterDimensions(sycl::range N) { + if (this->Dims != size_t(Dims_)) { throw std::runtime_error( "Dimensionality of cluster, global and local ranges must be same"); } - for (int I = 0; I < 3; ++I) - ClusterDimensions[I] = (I < Dims) ? N[I] : 1; + for (int I = 0; I < Dims_; ++I) + ClusterDimensions[I] = N[I]; } NDRDescT &operator=(const NDRDescT &Desc) = default; NDRDescT &operator=(NDRDescT &&Desc) = default; - sycl::range<3> GlobalSize{0, 0, 0}; - sycl::range<3> LocalSize{0, 0, 0}; - sycl::id<3> GlobalOffset{0, 0, 0}; + std::array GlobalSize{0, 0, 0}; + std::array LocalSize{0, 0, 0}; + std::array GlobalOffset{0, 0, 0}; /// Number of workgroups, used to record the number of workgroups from the /// simplest form of parallel_for_work_group. If set, all other fields must be /// zero - sycl::range<3> NumWorkGroups{0, 0, 0}; - sycl::range<3> ClusterDimensions{1, 1, 1}; + std::array NumWorkGroups{0, 0, 0}; + std::array ClusterDimensions{1, 1, 1}; size_t Dims = 0; }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d2530d7018813..f8625c8470ab0 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2270,8 +2270,15 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, ur_kernel_handle_t Kernel, if (WGSize[0] == 0) { WGSize = {1, 1, 1}; } - NDR = sycl::detail::NDRDescT{nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize), - static_cast(NDR.Dims)}; + + for (size_t I = 0; I < NDR.Dims; ++I) { + WGSize[I] *= NDR.NumWorkGroups[I]; + } + + for (size_t I = 0; I < NDR.Dims; ++I) { + NDR.GlobalSize[I] = WGSize[I]; + NDR.LocalSize[I] = WGSize[I]; + } } // We have the following mapping between dimensions with SPIR-V builtins: diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1c295fbd90c77..0d4ed57137777 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2204,12 +2204,28 @@ void handler::setKernelIsCooperative(bool KernelIsCooperative) { impl->MKernelIsCooperative = KernelIsCooperative; } -void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { +void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); impl->MKernelUsesClusterLaunch = true; - impl->MNDRDesc.setClusterDimensions(ClusterSize, Dims); + impl->MNDRDesc.setClusterDimensions(ClusterSize); +} + +void handler::setKernelClusterLaunch(sycl::range<2> ClusterSize) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(); + impl->MKernelUsesClusterLaunch = true; + impl->MNDRDesc.setClusterDimensions(ClusterSize); +} + +void handler::setKernelClusterLaunch(sycl::range<1> ClusterSize) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(); + impl->MKernelUsesClusterLaunch = true; + impl->MNDRDesc.setClusterDimensions(ClusterSize); } void handler::setKernelWorkGroupMem(size_t Size) { @@ -2359,18 +2375,40 @@ bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req, void handler::setType(sycl::detail::CGType Type) { impl->MCGType = Type; } sycl::detail::CGType handler::getType() const { return impl->MCGType; } -void handler::setNDRangeDescriptorPadded(sycl::range<3> N, - bool SetNumWorkGroups, int Dims) { - impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups, Dims}; +void handler::setNDRangeDescriptor(sycl::range<3> N, bool SetNumWorkGroups) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; +} +void handler::setNDRangeDescriptor(sycl::range<3> NumWorkItems, + sycl::id<3> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; +} +void handler::setNDRangeDescriptor(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; +} + +void handler::setNDRangeDescriptor(sycl::range<2> N, bool SetNumWorkGroups) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; +} +void handler::setNDRangeDescriptor(sycl::range<2> NumWorkItems, + sycl::id<2> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; +} +void handler::setNDRangeDescriptor(sycl::range<2> NumWorkItems, + sycl::range<2> LocalSize, sycl::id<2> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; +} + +void handler::setNDRangeDescriptor(sycl::range<1> N, bool SetNumWorkGroups) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; } -void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::id<3> Offset, int Dims) { - impl->MNDRDesc = NDRDescT{NumWorkItems, Offset, Dims}; +void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, + sycl::id<1> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; } -void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::range<3> LocalSize, - sycl::id<3> Offset, int Dims) { - impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims}; +void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, + sycl::range<1> LocalSize, sycl::id<1> Offset) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } void handler::setKernelNameBasedCachePtr( diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index c822e0f4ec78d..7aada0e384ec9 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3088,8 +3088,10 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValu _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEmPKv +_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ERKSt10shared_ptrINS4_22dynamic_parameter_implEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEEmPKv +_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ERKSt10shared_ptrINS4_22dynamic_parameter_implEE _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph12finalizeImplEv _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph6updateERKNS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph6updateERKNS3_4nodeE @@ -3111,6 +3113,12 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_5 _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_5queueERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_base19updateLocalAccessorENS0_5rangeILi3EEE +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_base18updateWorkGroupMemEm +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEm +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEEm _ZN4sycl3_V13ext6oneapi12experimental6detail30make_kernel_bundle_from_sourceERKNS0_7contextENS3_15source_languageENS0_6detail11string_viewESt6vectorISt4pairISA_SA_ESaISD_EE _ZN4sycl3_V13ext6oneapi12experimental6detail30make_kernel_bundle_from_sourceERKNS0_7contextENS3_15source_languageERKSt6vectorISt4byteSaISA_EES9_ISt4pairINS0_6detail11string_viewESH_ESaISI_EE _ZN4sycl3_V13ext6oneapi12experimental6memcpyENS0_5queueEPvPKvmRKNS0_6detail13code_locationE @@ -3154,13 +3162,11 @@ _ZN4sycl3_V15queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue22memcpyFromDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE _ZN4sycl3_V15queue22submit_with_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_14SubmissionInfoERKNS2_13code_locationEb _ZN4sycl3_V15queue22submit_with_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_2v114SubmissionInfoERKNS2_13code_locationEb -_ZNK4sycl3_V15queue22submit_with_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_2v114SubmissionInfoERKNS2_13code_locationEb _ZN4sycl3_V15queue22submit_with_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail14SubmissionInfoERKNS7_13code_locationEb _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25submit_without_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_14SubmissionInfoERKNS2_13code_locationEb _ZN4sycl3_V15queue25submit_without_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_2v114SubmissionInfoERKNS2_13code_locationEb -_ZNK4sycl3_V15queue25submit_without_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_2v114SubmissionInfoERKNS2_13code_locationEb _ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationEb _ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail14SubmissionInfoERKNS7_13code_locationEb @@ -3522,16 +3528,8 @@ _ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationE _ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationEb _ZN4sycl3_V17handler11storeRawArgEPKvm _ZN4sycl3_V17handler12addReductionERKSt10shared_ptrIKvE -_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_base18updateWorkGroupMemEm -_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEm -_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE -_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEEm -_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE -_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ERKSt10shared_ptrINS4_22dynamic_parameter_implEE -_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_base19updateLocalAccessorENS0_5rangeILi3EEE -_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ERKSt10shared_ptrINS4_22dynamic_parameter_implEE -_ZN4sycl3_V17handler12setArgHelperEiRNS0_6detail22work_group_memory_implE _ZN4sycl3_V17handler12setArgHelperEiONS0_6streamE +_ZN4sycl3_V17handler12setArgHelperEiRNS0_6detail22work_group_memory_implE _ZN4sycl3_V17handler13getKernelNameEv _ZN4sycl3_V17handler13setKernelInfoEPviPFNS0_6detail19kernel_param_desc_tEiEbb _ZN4sycl3_V17handler14addAccessorReqESt10shared_ptrINS0_6detail16AccessorImplHostEE @@ -3566,6 +3564,15 @@ _ZN4sycl3_V17handler20associateWithHandlerEPNS0_6detail28SampledImageAccessorBas _ZN4sycl3_V17handler20associateWithHandlerEPNS0_6detail30UnsampledImageAccessorBaseHostENS0_12image_targetE _ZN4sycl3_V17handler20memcpyToDeviceGlobalEPKvS3_bmm _ZN4sycl3_V17handler20setKernelCacheConfigENS1_23StableKernelCacheConfigE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi1EEENS0_2idILi1EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi1EEES3_NS0_2idILi1EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi1EEEb +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi2EEENS0_2idILi2EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi2EEES3_NS0_2idILi2EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi2EEEb +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi3EEENS0_2idILi3EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi3EEES3_NS0_2idILi3EEE +_ZN4sycl3_V17handler20setNDRangeDescriptorENS0_5rangeILi3EEEb _ZN4sycl3_V17handler20setStateSpecConstSetEv _ZN4sycl3_V17handler21setKernelWorkGroupMemEm _ZN4sycl3_V17handler21setUserFacingNodeTypeENS0_3ext6oneapi12experimental9node_typeE @@ -3573,7 +3580,9 @@ _ZN4sycl3_V17handler22ext_oneapi_fill2d_implEPvmPKvmmm _ZN4sycl3_V17handler22memcpyFromDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler22setHandlerKernelBundleENS0_6kernelE _ZN4sycl3_V17handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE -_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEEi +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi1EEE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi2EEE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEE _ZN4sycl3_V17handler22setKernelIsCooperativeEb _ZN4sycl3_V17handler23instantiateKernelOnHostEPv _ZN4sycl3_V17handler24GetRangeRoundingSettingsERmS2_S2_ @@ -3585,9 +3594,6 @@ _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6de _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE -_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi -_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEES3_NS0_2idILi3EEEi -_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEEbi _ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE _ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm _ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcPFNS0_6detail19kernel_param_desc_tEiEmb @@ -3717,6 +3723,8 @@ _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is _ZNK4sycl3_V15queue16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V15queue20ext_oneapi_get_graphEv _ZNK4sycl3_V15queue20ext_oneapi_get_stateEv +_ZNK4sycl3_V15queue22submit_with_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_2v114SubmissionInfoERKNS2_13code_locationEb +_ZNK4sycl3_V15queue25submit_without_event_implERKNS0_6detail19type_erased_cgfo_tyERKNS2_2v114SubmissionInfoERKNS2_13code_locationEb _ZNK4sycl3_V15queue28ext_codeplay_supports_fusionEv _ZNK4sycl3_V15queue30ext_oneapi_get_last_event_implEv _ZNK4sycl3_V15queue3getEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ed718ca0da207..05131408f1d94 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -308,12 +308,10 @@ ??0SubmissionInfo@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0SubmissionInfo@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0SubmissionInfo@detail@_V1@sycl@@QEAA@XZ +??0SubmissionInfo@v1@detail@_V1@sycl@@QEAA@$$QEAV01234@@Z ??0SubmissionInfo@v1@detail@_V1@sycl@@QEAA@AEBV01234@@Z ??0SubmissionInfo@v1@detail@_V1@sycl@@QEAA@AEBV0234@@Z ??0SubmissionInfo@v1@detail@_V1@sycl@@QEAA@XZ -??0SubmissionInfo@v1@detail@_V1@sycl@@QEAA@$$QEAV01234@@Z -??4SubmissionInfo@v1@detail@_V1@sycl@@QEAAAEAV01234@AEBV01234@@Z -??4SubmissionInfo@v1@detail@_V1@sycl@@QEAAAEAV01234@$$QEAV01234@@Z ??0UnsampledImageAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VUnsampledImageAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z @@ -362,11 +360,20 @@ ??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z ??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z ??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV?$command_graph@$0A@@12345@AEBV?$vector@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V?$allocator@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@@2@@std@@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$range@$02@56@HHAEBVproperty_list@56@@Z +??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z +??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV?$shared_ptr@Vdynamic_parameter_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@_KPEBX@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@_K@Z +??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??0event@_V1@sycl@@AEAA@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ??0event@_V1@sycl@@QEAA@$$QEAV012@@Z ??0event@_V1@sycl@@QEAA@AEBV012@@Z @@ -514,7 +521,9 @@ ??1device_image_plain@detail@_V1@sycl@@QEAA@XZ ??1device_selector@_V1@sycl@@UEAA@XZ ??1dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??1dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??1dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1event@_V1@sycl@@QEAA@XZ ??1exception@_V1@sycl@@UEAA@XZ ??1exception_list@_V1@sycl@@QEAA@XZ @@ -579,6 +588,8 @@ ??4SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4SubmissionInfo@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4SubmissionInfo@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z +??4SubmissionInfo@v1@detail@_V1@sycl@@QEAAAEAV01234@$$QEAV01234@@Z +??4SubmissionInfo@v1@detail@_V1@sycl@@QEAAAEAV01234@AEBV01234@@Z ??4UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4accelerator_selector@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z @@ -598,8 +609,12 @@ ??4device_selector@_V1@sycl@@QEAAAEAV012@AEBV012@@Z ??4dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z ??4dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z +??4dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +??4dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z ??4dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z +??4dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +??4dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4event@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4event@_V1@sycl@@QEAAAEAV012@AEBV012@@Z ??4exception@_V1@sycl@@QEAAAEAV012@AEBV012@@Z @@ -4379,15 +4394,23 @@ ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z ?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4StableKernelCacheConfig@123@@Z -?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$00@23@@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$01@23@@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@V?$id@$00@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@_N@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$01@23@0V?$id@$01@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$01@23@V?$id@$01@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$01@23@_N@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z +?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ @@ -4431,10 +4454,10 @@ ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@823@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEBA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@623@AEBUcode_location@623@_N@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXAEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@523@AEBUcode_location@523@_N@Z +?submit_without_event_impl@queue@_V1@sycl@@AEAAXAEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@523@AEBUcode_location@523@_N@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@_N@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@723@_N@Z -?submit_without_event_impl@queue@_V1@sycl@@AEAAXAEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@523@AEBUcode_location@523@_N@Z ?submit_without_event_impl@queue@_V1@sycl@@AEBAXAEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@523@AEBUcode_location@523@_N@Z ?supportsUSMFill2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemcpy2D@handler@_V1@sycl@@AEAA_NXZ @@ -4454,25 +4477,10 @@ ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBVnode@34567@@Z ?updateAccessor@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVAccessorBaseHost@267@@Z +?updateLocalAccessor@dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXV?$range@$02@67@@Z ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVraw_kernel_arg@34567@_K@Z ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z -??4dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z -??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$range@$02@56@HHAEBVproperty_list@56@@Z -??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV?$shared_ptr@Vdynamic_parameter_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z -??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z -??4dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z -??4dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z -??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ -??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@_K@Z -?updateLocalAccessor@dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXV?$range@$02@67@@Z -??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z -??4dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z -??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z -??1dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ?updateWorkGroupMem@dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAX_K@Z -??0dynamic_local_accessor_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ -??1dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ -??0dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyReductionProps@detail@_V1@sycl@@YAXAEBVproperty_list@23@@Z From 520e446f54285b02387acfd36f4b0775e8b2a489 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Fri, 6 Jun 2025 16:48:35 +0100 Subject: [PATCH 02/17] Format code --- sycl/source/handler.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 0d4ed57137777..e14198d89772a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2383,7 +2383,8 @@ void handler::setNDRangeDescriptor(sycl::range<3> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; } void handler::setNDRangeDescriptor(sycl::range<3> NumWorkItems, - sycl::range<3> LocalSize, sycl::id<3> Offset) { + sycl::range<3> LocalSize, + sycl::id<3> Offset) { impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } @@ -2395,7 +2396,8 @@ void handler::setNDRangeDescriptor(sycl::range<2> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; } void handler::setNDRangeDescriptor(sycl::range<2> NumWorkItems, - sycl::range<2> LocalSize, sycl::id<2> Offset) { + sycl::range<2> LocalSize, + sycl::id<2> Offset) { impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } @@ -2407,7 +2409,8 @@ void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; } void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, - sycl::range<1> LocalSize, sycl::id<1> Offset) { + sycl::range<1> LocalSize, + sycl::id<1> Offset) { impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } From 907717c0e21e147558ee35e31bd8a79ac9e62734 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Wed, 11 Jun 2025 15:58:14 +0100 Subject: [PATCH 03/17] Improve modification of NDRDescT in adjustNDRangePerKernel --- sycl/source/detail/scheduler/commands.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f8625c8470ab0..6347e8a889af4 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2272,12 +2272,13 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, ur_kernel_handle_t Kernel, } for (size_t I = 0; I < NDR.Dims; ++I) { - WGSize[I] *= NDR.NumWorkGroups[I]; - } - - for (size_t I = 0; I < NDR.Dims; ++I) { - NDR.GlobalSize[I] = WGSize[I]; + NDR.GlobalSize[I] = WGSize[I] * NDR.NumWorkGroups[I]; NDR.LocalSize[I] = WGSize[I]; + + // nvm below does not help + //NDR.NumWorkGroups = {0, 0, 0}; + //NDR.GlobalOffset = {0, 0, 0}; + //NDR.ClusterDimensions = {1, 1, 1}; } } From adafe3dff99cd1a06855ae1d1a8e3d99fcf9d795 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Wed, 11 Jun 2025 15:59:41 +0100 Subject: [PATCH 04/17] Fix bug when setting LocalSize by preserving old behaviour of setting extra dimensions to zero or one respectively weather LocalSizes is zero or not respectively --- sycl/source/detail/cg.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 132b220fbd699..17dd7c8b99e07 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -91,6 +91,10 @@ class NDRDescT { LocalSize[I] = LocalSizes[I]; GlobalOffset[I] = Offset[I]; } + + for (int I = Dims_; I < 3; ++I) { + LocalSize[I] = LocalSizes[0] ? 1 : 0; + } } template From ef58ba76b971e27bb5a6fc7df74bee57379f0516 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Wed, 11 Jun 2025 16:05:36 +0100 Subject: [PATCH 05/17] Format and remove mistakenly committed code --- sycl/source/detail/cg.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 17dd7c8b99e07..5c72dd0756a10 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -91,7 +91,7 @@ class NDRDescT { LocalSize[I] = LocalSizes[I]; GlobalOffset[I] = Offset[I]; } - + for (int I = Dims_; I < 3; ++I) { LocalSize[I] = LocalSizes[0] ? 1 : 0; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6347e8a889af4..4e2fab60dfae1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2274,11 +2274,6 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, ur_kernel_handle_t Kernel, for (size_t I = 0; I < NDR.Dims; ++I) { NDR.GlobalSize[I] = WGSize[I] * NDR.NumWorkGroups[I]; NDR.LocalSize[I] = WGSize[I]; - - // nvm below does not help - //NDR.NumWorkGroups = {0, 0, 0}; - //NDR.GlobalOffset = {0, 0, 0}; - //NDR.ClusterDimensions = {1, 1, 1}; } } From 7d4175f261bee823fde9e1a059c475a1873d1896 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Thu, 12 Jun 2025 15:26:40 +0100 Subject: [PATCH 06/17] Fix issues with .size() being called on std::array when previously was called on sycl::range --- sycl/source/handler.cpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 84f7656573826..256c024190c94 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1027,6 +1027,10 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { using detail::kernel_param_kind_t; + size_t GlobalSize = impl->MNDRDesc.GlobalSize[0]; + for (int I = 1; I < impl->MNDRDesc.Dims; ++I) { + GlobalSize *= impl->MNDRDesc.GlobalSize[I]; + } switch (Kind) { case kernel_param_kind_t::kind_std_layout: @@ -1042,32 +1046,33 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, static_cast(&S->GlobalBuf); detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase); detail::Requirement *GBufReq = GBufImpl.get(); - addArgsForGlobalAccessor( - GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, impl->MArgs, + IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase); detail::Requirement *GOffsetReq = GOfssetImpl.get(); - addArgsForGlobalAccessor( - GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, impl->MArgs, + IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase); detail::Requirement *GFlushReq = GFlushImpl.get(); - size_t GlobalSize = impl->MNDRDesc.GlobalSize.size(); // If work group size wasn't set explicitly then it must be recieved // from kernel attribute or set to default values. // For now we can't get this attribute here. // So we just suppose that WG size is always default for stream. // TODO adjust MNDRDesc when device image contains kernel's attribute if (GlobalSize == 0) { - // Suppose that work group size is 1 for every dimension - GlobalSize = impl->MNDRDesc.NumWorkGroups.size(); + GlobalSize = impl->MNDRDesc.NumWorkGroups[0]; + for (int I = 1; I < impl->MNDRDesc.Dims; ++I) { + GlobalSize *= impl->MNDRDesc.NumWorkGroups[I]; + } } addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, IsKernelCreatedFromSource, GlobalSize, impl->MArgs, @@ -1087,9 +1092,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::device: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - addArgsForGlobalAccessor( - AccImpl, Index, IndexShift, Size, IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); break; } case access::target::local: { From 4fe9507fd9b55b0aef49bcd63c674c7d65668324 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Thu, 12 Jun 2025 16:06:35 +0100 Subject: [PATCH 07/17] swap int with size_t --- sycl/source/handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 256c024190c94..e5c6934835630 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1028,7 +1028,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, bool IsKernelCreatedFromSource, bool IsESIMD) { using detail::kernel_param_kind_t; size_t GlobalSize = impl->MNDRDesc.GlobalSize[0]; - for (int I = 1; I < impl->MNDRDesc.Dims; ++I) { + for (size_t I = 1; I < impl->MNDRDesc.Dims; ++I) { GlobalSize *= impl->MNDRDesc.GlobalSize[I]; } @@ -1070,7 +1070,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // TODO adjust MNDRDesc when device image contains kernel's attribute if (GlobalSize == 0) { GlobalSize = impl->MNDRDesc.NumWorkGroups[0]; - for (int I = 1; I < impl->MNDRDesc.Dims; ++I) { + for (size_t I = 1; I < impl->MNDRDesc.Dims; ++I) { GlobalSize *= impl->MNDRDesc.NumWorkGroups[I]; } } From 11fdc8901766e2283e17fb77977ea5c0f6ddbf27 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Thu, 12 Jun 2025 17:21:51 +0100 Subject: [PATCH 08/17] Set GlobalRange default value to 1 --- sycl/source/detail/cg.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 5c72dd0756a10..eea07f31b558b 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -129,7 +129,7 @@ class NDRDescT { NDRDescT &operator=(const NDRDescT &Desc) = default; NDRDescT &operator=(NDRDescT &&Desc) = default; - std::array GlobalSize{0, 0, 0}; + std::array GlobalSize{1, 1, 1}; std::array LocalSize{0, 0, 0}; std::array GlobalOffset{0, 0, 0}; /// Number of workgroups, used to record the number of workgroups from the From 19e8982bea8dfb7c88b6b9dd7c6d74c9a2925bc5 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Fri, 13 Jun 2025 11:44:57 +0100 Subject: [PATCH 09/17] Preserve previous behaviour to get HierPar/hier_par_basic.cpp to pass by setting extra dimension values to zero when using spercific constructor --- sycl/source/detail/cg.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index eea07f31b558b..3ef15e28e6b71 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -104,6 +104,10 @@ class NDRDescT { GlobalSize[I] = NumWorkItems[I]; GlobalOffset[I] = Offset[I]; } + + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 0; + } } template From 73b8e4d6372089cf066a07d0e2f6d28211b5522a Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Fri, 13 Jun 2025 18:47:15 +0100 Subject: [PATCH 10/17] Preserve old behaviour of GlobalSize being set to zero when default constructor is used in NDRDescT --- sycl/source/detail/cg.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 3ef15e28e6b71..f74701f938929 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -80,6 +80,9 @@ class NDRDescT { GlobalSize[I] = N[I]; } } + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 1; + } } template @@ -95,6 +98,10 @@ class NDRDescT { for (int I = Dims_; I < 3; ++I) { LocalSize[I] = LocalSizes[0] ? 1 : 0; } + + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 1; + } } template @@ -105,9 +112,9 @@ class NDRDescT { GlobalOffset[I] = Offset[I]; } - for (int I = Dims_; I < 3; ++I) { - GlobalSize[I] = 0; - } + //for (int I = Dims_; I < 3; ++I) { + // GlobalSize[I] = 0; + //} } template @@ -133,7 +140,7 @@ class NDRDescT { NDRDescT &operator=(const NDRDescT &Desc) = default; NDRDescT &operator=(NDRDescT &&Desc) = default; - std::array GlobalSize{1, 1, 1}; + std::array GlobalSize{0, 0, 0}; std::array LocalSize{0, 0, 0}; std::array GlobalOffset{0, 0, 0}; /// Number of workgroups, used to record the number of workgroups from the From 889b4d782b598657b75cfd8d9cbdab549789fedd Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Fri, 13 Jun 2025 18:54:46 +0100 Subject: [PATCH 11/17] Remove commented out code --- sycl/source/detail/cg.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index f74701f938929..37d1fadf0c091 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -111,10 +111,6 @@ class NDRDescT { GlobalSize[I] = NumWorkItems[I]; GlobalOffset[I] = Offset[I]; } - - //for (int I = Dims_; I < 3; ++I) { - // GlobalSize[I] = 0; - //} } template From 9964dcf75a38e6f37845a4821bae22f9d1d34425 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Mon, 16 Jun 2025 16:52:34 +0100 Subject: [PATCH 12/17] remove setting extra global size dims to 1 when using SetNumWorkGroups constructor --- sycl/source/detail/cg.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 37d1fadf0c091..14014bc6d1e1f 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -80,9 +80,6 @@ class NDRDescT { GlobalSize[I] = N[I]; } } - for (int I = Dims_; I < 3; ++I) { - GlobalSize[I] = 1; - } } template From 6c51413a9c9158a3ea4341cd9c2914c7b945c315 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Mon, 16 Jun 2025 17:53:39 +0100 Subject: [PATCH 13/17] Reintroduce setting extra global size dims to 1 only when SetNumWorkGroups is false --- sycl/source/detail/cg.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 14014bc6d1e1f..08d44efba30ec 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -62,6 +62,11 @@ class ArgDesc { // The structure represents NDRange - global, local sizes, global offset and // number of dimensions. + +// TODO: A lot of tests rely on particular dimension values to be set for +// dimensions that are higher than the number of dimensions actually being used +// as passed via `Dims_`. In addition, `GlobalSize` being zero is used as +// indication that `NumWorkGroup` should be used. class NDRDescT { public: @@ -79,6 +84,10 @@ class NDRDescT { for (size_t I = 0; I < Dims_; ++I) { GlobalSize[I] = N[I]; } + + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 1; + } } } @@ -121,10 +130,8 @@ class NDRDescT { : NDRDescT(Range, /*SetNumWorkGroups=*/false) {} template void setClusterDimensions(sycl::range N) { - if (this->Dims != size_t(Dims_)) { - throw std::runtime_error( - "Dimensionality of cluster, global and local ranges must be same"); - } + assert(this->Dims == size_t(Dims_) && + "Dimensionality of cluster, global and local ranges must be same"); for (int I = 0; I < Dims_; ++I) ClusterDimensions[I] = N[I]; From 9e879fa76aa2cc5e4829fc7c133bd0876a23cc6a Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Wed, 18 Jun 2025 14:50:08 +0100 Subject: [PATCH 14/17] Fix formatting --- sycl/source/detail/cg.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index dc5c23fdccd71..fe06b8accfa5a 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -84,7 +84,7 @@ class NDRDescT { for (size_t I = 0; I < Dims_; ++I) { GlobalSize[I] = N[I]; } - + for (int I = Dims_; I < 3; ++I) { GlobalSize[I] = 1; } From ace2ae28afd99d3dc709d9ebab87a07b0bee82e3 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Wed, 18 Jun 2025 15:25:40 +0100 Subject: [PATCH 15/17] Update TODO text --- sycl/source/detail/cg.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index fe06b8accfa5a..c2651b54cd1bc 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -63,10 +63,12 @@ class ArgDesc { // The structure represents NDRange - global, local sizes, global offset and // number of dimensions. -// TODO: A lot of tests rely on particular dimension values to be set for -// dimensions that are higher than the number of dimensions actually being used -// as passed via `Dims_`. In addition, `GlobalSize` being zero is used as -// indication that `NumWorkGroup` should be used. +// TODO: A lot of tests tely on particular values to be set for dimensions that +// are not used. To clarify, for example, if a 2D kernel is invoked, in +// NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0 +// depending on which constructor is used for no clear reason. +// Instead, only sensible defaults should be used and tests should be updated +// to reflect this. class NDRDescT { public: From abbeed96de9bb93d01919a4a7d87777a36b34cdc Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Wed, 18 Jun 2025 15:37:09 +0100 Subject: [PATCH 16/17] Fix spelling error in comment --- sycl/source/detail/cg.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index c2651b54cd1bc..7112da540aded 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -63,7 +63,7 @@ class ArgDesc { // The structure represents NDRange - global, local sizes, global offset and // number of dimensions. -// TODO: A lot of tests tely on particular values to be set for dimensions that +// TODO: A lot of tests rely on particular values to be set for dimensions that // are not used. To clarify, for example, if a 2D kernel is invoked, in // NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0 // depending on which constructor is used for no clear reason. From b04e914d224f6e2249040e2a04b063b42f22d231 Mon Sep 17 00:00:00 2001 From: Duncan Brawley Date: Thu, 26 Jun 2025 16:32:08 +0100 Subject: [PATCH 17/17] Revert swapping throw within setClusterDimensions with assert --- sycl/source/detail/cg.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 7112da540aded..ce0aed954733c 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -132,8 +132,10 @@ class NDRDescT { : NDRDescT(Range, /*SetNumWorkGroups=*/false) {} template void setClusterDimensions(sycl::range N) { - assert(this->Dims == size_t(Dims_) && - "Dimensionality of cluster, global and local ranges must be same"); + if (this->Dims != size_t(Dims_)) { + throw std::runtime_error( + "Dimensionality of cluster, global and local ranges must be same"); + } for (int I = 0; I < Dims_; ++I) ClusterDimensions[I] = N[I];