Skip to content

Commit 13a7ee5

Browse files
committed
[SYCL] Fix functor versions of cl::sycl::handler kernel invocation APIs.
Remove "template overloads" of single_task and parallel_for invocation APIs leading to template instantiation ambiguity in some cases and causing compilation errors. The APIs with KernelName and KernelType template parameters are changed to infer correct KernelName in both lambda and functor flavors of API invocation. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 7960931 commit 13a7ee5

File tree

2 files changed

+59
-92
lines changed

2 files changed

+59
-92
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 57 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ namespace csd = cl::sycl::detail;
6767
template <typename T, int Dimensions, typename AllocatorT> class buffer;
6868
namespace detail {
6969

70+
/// This class is the default KernelName template parameter type for kernel
71+
/// invocation APIs such as single_task.
72+
class auto_name {};
73+
7074
class queue_impl;
7175
class stream_impl;
7276
template <typename RetType, typename Func, typename Arg>
@@ -88,6 +92,19 @@ decltype(member_ptr_helper(&F::operator())) argument_helper(F);
8892

8993
template <typename T>
9094
using lambda_arg_type = decltype(argument_helper(std::declval<T>()));
95+
96+
/// Helper struct to get a kernel name type based on given \c Name and \c Type
97+
/// types: if \c Name is undefined (is a \c auto_name) then \c Type becomes
98+
/// the \c Name.
99+
template <typename Name, typename Type> struct get_kernel_name_t {
100+
using name = Name;
101+
};
102+
103+
/// Specialization for the case when \c Name is undefined.
104+
template <typename Type> struct get_kernel_name_t<csd::auto_name, Type> {
105+
using name = Type;
106+
};
107+
91108
} // namespace detail
92109

93110
// Objects of the handler class collect information about command group, such as
@@ -590,83 +607,62 @@ class handler {
590607
}
591608

592609
// single_task version with a kernel represented as a lambda.
593-
template <typename KernelName, typename KernelType>
610+
template <typename KernelName = csd::auto_name, typename KernelType>
594611
void single_task(KernelType KernelFunc) {
612+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
595613
#ifdef __SYCL_DEVICE_ONLY__
596-
kernel_single_task<KernelName>(KernelFunc);
614+
kernel_single_task<NameT>(KernelFunc);
597615
#else
598616
MNDRDesc.set(range<1>{1});
599617

600-
StoreLambda<KernelName, KernelType, /*Dims*/ 0, void>(KernelFunc);
618+
StoreLambda<NameT, KernelType, /*Dims*/ 0, void>(KernelFunc);
601619
MCGType = detail::CG::KERNEL;
602620
#endif
603621
}
604622

605-
// single_task version with a kernel represented as a functor. Simply redirect
606-
// to the lambda-based form of invocation, setting kernel name type to the
607-
// functor type.
608-
template <typename KernelFunctorType>
609-
void single_task(KernelFunctorType KernelFunctor) {
610-
single_task<KernelFunctorType, KernelFunctorType>(KernelFunctor);
611-
}
612-
613623
// parallel_for version with a kernel represented as a lambda + range that
614624
// specifies global size only.
615-
template <typename KernelName, typename KernelType, int Dims>
625+
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
616626
void parallel_for(range<Dims> NumWorkItems, KernelType KernelFunc) {
627+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
617628
#ifdef __SYCL_DEVICE_ONLY__
618-
kernel_parallel_for<KernelName, KernelType, Dims>(KernelFunc);
629+
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
619630
#else
620631
MNDRDesc.set(std::move(NumWorkItems));
621-
StoreLambda<KernelName, KernelType, Dims>(std::move(KernelFunc));
632+
StoreLambda<NameT, KernelType, Dims>(std::move(KernelFunc));
622633
MCGType = detail::CG::KERNEL;
623634
#endif
624635
}
625636

626-
// parallel_for version with a kernel represented as a functor + range that
627-
// specifies global size only. Simply redirect to the lambda-based form of
628-
// invocation, setting kernel name type to the functor type.
629-
template <typename KernelType, int Dims>
630-
void parallel_for(range<Dims> NumWorkItems, KernelType KernelFunc) {
631-
parallel_for<KernelType, KernelType, Dims>(NumWorkItems, KernelFunc);
632-
}
633-
634637
// parallel_for version with a kernel represented as a lambda + range and
635638
// offset that specify global size and global offset correspondingly.
636-
template <typename KernelName, typename KernelType, int Dims>
639+
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
637640
void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
638641
KernelType KernelFunc) {
642+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
639643
#ifdef __SYCL_DEVICE_ONLY__
640-
kernel_parallel_for<KernelName, KernelType, Dims>(KernelFunc);
644+
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
641645
#else
642646
MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
643-
StoreLambda<KernelName, KernelType, Dims>(std::move(KernelFunc));
647+
StoreLambda<NameT, KernelType, Dims>(std::move(KernelFunc));
644648
MCGType = detail::CG::KERNEL;
645649
#endif
646650
}
647651

648652
// parallel_for version with a kernel represented as a lambda + nd_range that
649653
// specifies global, local sizes and offset.
650-
template <typename KernelName, typename KernelType, int Dims>
654+
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
651655
void parallel_for(nd_range<Dims> ExecutionRange, KernelType KernelFunc) {
656+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
652657
#ifdef __SYCL_DEVICE_ONLY__
653-
kernel_parallel_for<KernelName, KernelType, Dims>(KernelFunc);
658+
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
654659
#else
655660
MNDRDesc.set(std::move(ExecutionRange));
656-
StoreLambda<KernelName, KernelType, Dims>(std::move(KernelFunc));
661+
StoreLambda<NameT, KernelType, Dims>(std::move(KernelFunc));
657662
MCGType = detail::CG::KERNEL;
658663
#endif
659664
}
660665

661-
// parallel_for version with a kernel represented as a functor + nd_range that
662-
// specifies global, local sizes and offset. Simply redirect to the
663-
// lambda-based form of invocation, setting kernel name type to the functor
664-
// type.
665-
template <typename KernelType, int Dims>
666-
void parallel_for(nd_range<Dims> ExecutionRange, KernelType KernelFunc) {
667-
parallel_for<KernelType, KernelType, Dims>(ExecutionRange, KernelFunc);
668-
}
669-
670666
// template <typename KernelName, typename WorkgroupFunctionType, int
671667
// dimensions>
672668
// void parallel_for_work_group(range<dimensions> numWorkGroups,
@@ -732,111 +728,82 @@ class handler {
732728
// single_task version which takes two "kernels". One is a lambda which is
733729
// used if device, queue is bound to, is host device. Second is a sycl::kernel
734730
// which is used otherwise.
735-
template <typename KernelName, typename KernelType>
731+
template <typename KernelName = csd::auto_name, typename KernelType>
736732
void single_task(kernel SyclKernel, KernelType KernelFunc) {
733+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
737734
#ifdef __SYCL_DEVICE_ONLY__
738-
kernel_single_task<KernelName>(KernelFunc);
735+
kernel_single_task<NameT>(KernelFunc);
739736
#else
740737
MNDRDesc.set(range<1>{1});
741738
MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel));
742739
MCGType = detail::CG::KERNEL;
743-
if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName>())
740+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
744741
extractArgsAndReqs();
745742
else
746-
StoreLambda<KernelName, KernelType, /*Dims*/ 0, void>(
747-
std::move(KernelFunc));
743+
StoreLambda<NameT, KernelType, /*Dims*/ 0, void>(std::move(KernelFunc));
748744
#endif
749745
}
750746

751-
// single_task version which takes two "kernels". One is a functor which is
752-
// used if device, queue is bound to, is host device. Second is a sycl::kernel
753-
// which is used otherwise. Simply redirect to the lambda-based form of
754-
// invocation, setting kernel name type to the functor type.
755-
template <typename KernelType>
756-
void single_task(kernel SyclKernel, KernelType KernelFunc) {
757-
single_task<KernelType, KernelType>(SyclKernel, KernelFunc);
758-
}
759-
760747
// parallel_for version which takes two "kernels". One is a lambda which is
761748
// used if device, queue is bound to, is host device. Second is a sycl::kernel
762749
// which is used otherwise. range argument specifies global size.
763-
template <typename KernelName, typename KernelType, int Dims>
764-
void parallel_for(range<Dims> NumWorkItems, kernel SyclKernel,
750+
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
751+
void parallel_for(kernel SyclKernel, range<Dims> NumWorkItems,
765752
KernelType KernelFunc) {
753+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
766754
#ifdef __SYCL_DEVICE_ONLY__
767-
kernel_parallel_for<KernelName, KernelType, Dims>(KernelFunc);
755+
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
768756
#else
769757
MNDRDesc.set(std::move(NumWorkItems));
770758
MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel));
771759
MCGType = detail::CG::KERNEL;
772-
if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName>())
760+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
773761
extractArgsAndReqs();
774762
else
775-
StoreLambda<KernelName, KernelType, Dims>(std::move(KernelFunc));
763+
StoreLambda<NameT, KernelType, Dims>(std::move(KernelFunc));
776764
#endif
777765
}
778766

779-
// parallel_for version which takes two "kernels". One is a functor which is
780-
// used if device, queue is bound to, is host device. Second is a sycl::kernel
781-
// which is used otherwise. range argument specifies global size. Simply
782-
// redirect to the lambda-based form of invocation, setting kernel name type
783-
// to the functor type.
784-
template <typename KernelType, int Dims>
785-
void parallel_for(range<Dims> NumWorkItems, kernel SyclKernel,
786-
KernelType KernelFunc) {
787-
parallel_for<KernelType, KernelType, Dims>(NumWorkItems, SyclKernel,
788-
KernelFunc);
789-
}
790-
791767
// parallel_for version which takes two "kernels". One is a lambda which is
792768
// used if device, queue is bound to, is host device. Second is a sycl::kernel
793769
// which is used otherwise. range and id specify global size and offset.
794-
template <typename KernelName, typename KernelType, int Dims>
795-
void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
796-
kernel SyclKernel, KernelType KernelFunc) {
770+
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
771+
void parallel_for(kernel SyclKernel, range<Dims> NumWorkItems,
772+
id<Dims> WorkItemOffset, KernelType KernelFunc) {
773+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
797774
#ifdef __SYCL_DEVICE_ONLY__
798-
kernel_parallel_for<KernelName, KernelType, Dims>(KernelFunc);
775+
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
799776
#else
800777
MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
801778
MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel));
802779
MCGType = detail::CG::KERNEL;
803-
if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName>())
780+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
804781
extractArgsAndReqs();
805782
else
806-
StoreLambda<KernelName, KernelType, Dims>(std::move(KernelFunc));
783+
StoreLambda<NameT, KernelType, Dims>(std::move(KernelFunc));
807784
#endif
808785
}
809786

810787
// parallel_for version which takes two "kernels". One is a lambda which is
811788
// used if device, queue is bound to, is host device. Second is a sycl::kernel
812789
// which is used otherwise. nd_range specifies global, local size and offset.
813-
template <typename KernelName, typename KernelType, int Dims>
814-
void parallel_for(nd_range<Dims> NDRange, kernel SyclKernel,
790+
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
791+
void parallel_for(kernel SyclKernel, nd_range<Dims> NDRange,
815792
KernelType KernelFunc) {
793+
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
816794
#ifdef __SYCL_DEVICE_ONLY__
817-
kernel_parallel_for<KernelName, KernelType, Dims>(KernelFunc);
795+
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
818796
#else
819797
MNDRDesc.set(std::move(NDRange));
820798
MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel));
821799
MCGType = detail::CG::KERNEL;
822-
if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName>())
800+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
823801
extractArgsAndReqs();
824802
else
825-
StoreLambda<KernelName, KernelType, Dims>(std::move(KernelFunc));
803+
StoreLambda<NameT, KernelType, Dims>(std::move(KernelFunc));
826804
#endif
827805
}
828806

829-
// parallel_for version which takes two "kernels". One is a functor which is
830-
// used if device, queue is bound to, is host device. Second is a sycl::kernel
831-
// which is used otherwise. nd_range specifies global, local size and offset.
832-
// Simply redirects to the lambda-based form of invocation, setting kernel
833-
// name type to the functor type.
834-
template <typename KernelType, int Dims>
835-
void parallel_for(nd_range<Dims> NDRange, kernel SyclKernel,
836-
KernelType KernelFunc) {
837-
parallel_for<KernelType, KernelType, Dims>(NDRange, SyclKernel, KernelFunc);
838-
}
839-
840807
// template <typename KernelName, typename WorkgroupFunctionType, int
841808
// dimensions>
842809
// void parallel_for_work_group(range<dimensions> num_work_groups, kernel

sycl/test/kernel-and-program/kernel-and-program.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ int main() {
149149
q.submit([&](cl::sycl::handler &cgh) {
150150
auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
151151
cgh.parallel_for<class ParallelFor>(
152-
numOfItems, krn,
152+
krn, numOfItems,
153153
[=](cl::sycl::id<1> wiID) { acc[wiID] = acc[wiID] + 1; });
154154
});
155155
}
@@ -233,7 +233,7 @@ int main() {
233233
localAcc(localRange, cgh);
234234

235235
cgh.parallel_for<class ParallelForND>(
236-
cl::sycl::nd_range<1>(numOfItems, localRange), krn,
236+
krn, cl::sycl::nd_range<1>(numOfItems, localRange),
237237
[=](cl::sycl::nd_item<1> item) {
238238
size_t idx = item.get_global_linear_id();
239239
int pos = idx & 1;

0 commit comments

Comments
 (0)