diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 7045cfe670a6..b620b4929752 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3716,6 +3716,9 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); +#endif void setKernelClusterLaunch(sycl::range<3> ClusterSize); void setKernelClusterLaunch(sycl::range<2> ClusterSize); void setKernelClusterLaunch(sycl::range<1> ClusterSize); @@ -3819,6 +3822,16 @@ class __SYCL_EXPORT handler { bool HasAssociatedAccessor(detail::AccessorImplHost *Req, access::target AccessTarget) const; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + void setNDRangeDescriptorPadded(sycl::range<3> N, bool SetNumWorkGroups, + int Dims); + void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::id<3> Offset, int Dims); + void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset, + int Dims); +#endif + template void setNDRangeDescriptor(sycl::range N, bool SetNumWorkGroups = false) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 8724e829fe32..8459da7701f5 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2243,6 +2243,25 @@ void handler::setKernelIsCooperative(bool KernelIsCooperative) { impl->MKernelIsCooperative = KernelIsCooperative; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(); + impl->MKernelUsesClusterLaunch = true; + + if (Dims == 1) { + sycl::range<1> ClusterSizeTrimmed = {ClusterSize[0]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (Dims == 2) { + sycl::range<2> ClusterSizeTrimmed = {ClusterSize[0], ClusterSize[1]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (Dims == 3) { + impl->MNDRDesc.setClusterDimensions(ClusterSize); + } +} +#endif + void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: @@ -2419,6 +2438,56 @@ bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req, void handler::setType(sycl::detail::CGType Type) { impl->MCGType = Type; } sycl::detail::CGType handler::getType() const { return impl->MCGType; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::setNDRangeDescriptorPadded(sycl::range<3> N, + bool SetNumWorkGroups, int Dims) { + if (Dims == 1) { + sycl::range<1> Range = {N[0]}; + impl->MNDRDesc = NDRDescT{Range, SetNumWorkGroups}; + } else if (Dims == 2) { + sycl::range<2> Range = {N[0], N[1]}; + impl->MNDRDesc = NDRDescT{Range, SetNumWorkGroups}; + } else if (Dims == 3) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; + } +} + +void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::id<3> Offset, int Dims) { + if (Dims == 1) { + sycl::range<1> NumWorkItemsTrimmed = {NumWorkItems[0]}; + sycl::id<1> OffsetTrimmed = {Offset[0]}; + impl->MNDRDesc = NDRDescT{NumWorkItemsTrimmed, OffsetTrimmed}; + } else if (Dims == 2) { + sycl::range<2> NumWorkItemsTrimmed = {NumWorkItems[0], NumWorkItems[1]}; + sycl::id<2> OffsetTrimmed = {Offset[0], Offset[1]}; + impl->MNDRDesc = NDRDescT{NumWorkItemsTrimmed, OffsetTrimmed}; + } else if (Dims == 3) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset}; + } +} + +void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, + sycl::id<3> Offset, int Dims) { + if (Dims == 1) { + sycl::range<1> NumWorkItemsTrimmed = {NumWorkItems[0]}; + sycl::range<1> LocalSizeTrimmed = {LocalSize[0]}; + sycl::id<1> OffsetTrimmed = {Offset[0]}; + impl->MNDRDesc = + NDRDescT{NumWorkItemsTrimmed, LocalSizeTrimmed, OffsetTrimmed}; + } else if (Dims == 2) { + sycl::range<2> NumWorkItemsTrimmed = {NumWorkItems[0], NumWorkItems[1]}; + sycl::range<2> LocalSizeTrimmed = {LocalSize[0], LocalSize[1]}; + sycl::id<2> OffsetTrimmed = {Offset[0], Offset[1]}; + impl->MNDRDesc = + NDRDescT{NumWorkItemsTrimmed, LocalSizeTrimmed, OffsetTrimmed}; + } else if (Dims == 3) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; + } +} +#endif + void handler::setNDRangeDescriptor(sycl::range<3> N, bool SetNumWorkGroups) { impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups}; } diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 962583e458ba..ffd07c6d6c17 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3590,6 +3590,7 @@ _ZN4sycl3_V17handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kerne _ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi1EEE _ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi2EEE _ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEEi _ZN4sycl3_V17handler22setKernelIsCooperativeEb _ZN4sycl3_V17handler23instantiateKernelOnHostEPv _ZN4sycl3_V17handler24GetRangeRoundingSettingsERmS2_S2_ @@ -3601,6 +3602,9 @@ _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6de _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEES3_NS0_2idILi3EEEi +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEEbi _ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE _ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm _ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcPFNS0_6detail19kernel_param_desc_tEiEmb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d5e0384e67ba..348081415626 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4405,6 +4405,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$00@23@@Z ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$01@23@@Z ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z @@ -4419,6 +4420,9 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ