From b0cf570e55a3f917c26e6f60630233a5f5c1b0f6 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 29 Sep 2021 19:02:24 +0300 Subject: [PATCH] [SYCL] Submission with kernel parameter ignores set kernel bundle The SYCL 2020 specification states that `single_task` and `parallel_for` with a `kernel` parameter should ignore a previously set kernel bundle and use the kernel bundle that contains the kernel. These changes make the functions overwrite the set kernel bundle. Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl/handler.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 9337586914a29..e2ee965e70762 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -1788,6 +1788,8 @@ class __SYCL_EXPORT handler { void single_task(kernel Kernel) { throwIfActionIsCreated(); verifyKernelInvoc(Kernel); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); // No need to check if range is out of INT_MAX limits as it's compile-time // known constant MNDRDesc.set(range<1>{1}); @@ -1859,6 +1861,8 @@ class __SYCL_EXPORT handler { template void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc)) { throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); using NameT = typename detail::get_kernel_name_t::name; (void)Kernel; @@ -1902,6 +1906,8 @@ class __SYCL_EXPORT handler { void parallel_for(kernel Kernel, range NumWorkItems, _KERNELFUNCPARAM(KernelFunc)) { throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = sycl::detail::lambda_arg_type>; @@ -1937,6 +1943,8 @@ class __SYCL_EXPORT handler { void parallel_for(kernel Kernel, range NumWorkItems, id WorkItemOffset, _KERNELFUNCPARAM(KernelFunc)) { throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = sycl::detail::lambda_arg_type>; @@ -1972,6 +1980,8 @@ class __SYCL_EXPORT handler { void parallel_for(kernel Kernel, nd_range NDRange, _KERNELFUNCPARAM(KernelFunc)) { throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = @@ -2011,6 +2021,8 @@ class __SYCL_EXPORT handler { void parallel_for_work_group(kernel Kernel, range NumWorkGroups, _KERNELFUNCPARAM(KernelFunc)) { throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = @@ -2048,6 +2060,8 @@ class __SYCL_EXPORT handler { range WorkGroupSize, _KERNELFUNCPARAM(KernelFunc)) { throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(detail::getSyclObjImpl(Kernel.get_kernel_bundle())); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType =