@@ -67,6 +67,10 @@ namespace csd = cl::sycl::detail;
67
67
template <typename T, int Dimensions, typename AllocatorT> class buffer ;
68
68
namespace detail {
69
69
70
+ // / This class is the default KernelName template parameter type for kernel
71
+ // / invocation APIs such as single_task.
72
+ class auto_name {};
73
+
70
74
class queue_impl ;
71
75
class stream_impl ;
72
76
template <typename RetType, typename Func, typename Arg>
@@ -88,6 +92,19 @@ decltype(member_ptr_helper(&F::operator())) argument_helper(F);
88
92
89
93
template <typename T>
90
94
using lambda_arg_type = decltype (argument_helper(std::declval<T>()));
95
+
96
+ // / Helper struct to get a kernel name type based on given \c Name and \c Type
97
+ // / types: if \c Name is undefined (is a \c auto_name) then \c Type becomes
98
+ // / the \c Name.
99
+ template <typename Name, typename Type> struct get_kernel_name_t {
100
+ using name = Name;
101
+ };
102
+
103
+ // / Specialization for the case when \c Name is undefined.
104
+ template <typename Type> struct get_kernel_name_t <csd::auto_name, Type> {
105
+ using name = Type;
106
+ };
107
+
91
108
} // namespace detail
92
109
93
110
// Objects of the handler class collect information about command group, such as
@@ -590,83 +607,62 @@ class handler {
590
607
}
591
608
592
609
// single_task version with a kernel represented as a lambda.
593
- template <typename KernelName, typename KernelType>
610
+ template <typename KernelName = csd::auto_name , typename KernelType>
594
611
void single_task (KernelType KernelFunc) {
612
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
595
613
#ifdef __SYCL_DEVICE_ONLY__
596
- kernel_single_task<KernelName >(KernelFunc);
614
+ kernel_single_task<NameT >(KernelFunc);
597
615
#else
598
616
MNDRDesc.set (range<1 >{1 });
599
617
600
- StoreLambda<KernelName , KernelType, /* Dims*/ 0 , void >(KernelFunc);
618
+ StoreLambda<NameT , KernelType, /* Dims*/ 0 , void >(KernelFunc);
601
619
MCGType = detail::CG::KERNEL;
602
620
#endif
603
621
}
604
622
605
- // single_task version with a kernel represented as a functor. Simply redirect
606
- // to the lambda-based form of invocation, setting kernel name type to the
607
- // functor type.
608
- template <typename KernelFunctorType>
609
- void single_task (KernelFunctorType KernelFunctor) {
610
- single_task<KernelFunctorType, KernelFunctorType>(KernelFunctor);
611
- }
612
-
613
623
// parallel_for version with a kernel represented as a lambda + range that
614
624
// specifies global size only.
615
- template <typename KernelName, typename KernelType, int Dims>
625
+ template <typename KernelName = csd::auto_name , typename KernelType, int Dims>
616
626
void parallel_for (range<Dims> NumWorkItems, KernelType KernelFunc) {
627
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
617
628
#ifdef __SYCL_DEVICE_ONLY__
618
- kernel_parallel_for<KernelName , KernelType, Dims>(KernelFunc);
629
+ kernel_parallel_for<NameT , KernelType, Dims>(KernelFunc);
619
630
#else
620
631
MNDRDesc.set (std::move (NumWorkItems));
621
- StoreLambda<KernelName , KernelType, Dims>(std::move (KernelFunc));
632
+ StoreLambda<NameT , KernelType, Dims>(std::move (KernelFunc));
622
633
MCGType = detail::CG::KERNEL;
623
634
#endif
624
635
}
625
636
626
- // parallel_for version with a kernel represented as a functor + range that
627
- // specifies global size only. Simply redirect to the lambda-based form of
628
- // invocation, setting kernel name type to the functor type.
629
- template <typename KernelType, int Dims>
630
- void parallel_for (range<Dims> NumWorkItems, KernelType KernelFunc) {
631
- parallel_for<KernelType, KernelType, Dims>(NumWorkItems, KernelFunc);
632
- }
633
-
634
637
// parallel_for version with a kernel represented as a lambda + range and
635
638
// offset that specify global size and global offset correspondingly.
636
- template <typename KernelName, typename KernelType, int Dims>
639
+ template <typename KernelName = csd::auto_name , typename KernelType, int Dims>
637
640
void parallel_for (range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
638
641
KernelType KernelFunc) {
642
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
639
643
#ifdef __SYCL_DEVICE_ONLY__
640
- kernel_parallel_for<KernelName , KernelType, Dims>(KernelFunc);
644
+ kernel_parallel_for<NameT , KernelType, Dims>(KernelFunc);
641
645
#else
642
646
MNDRDesc.set (std::move (NumWorkItems), std::move (WorkItemOffset));
643
- StoreLambda<KernelName , KernelType, Dims>(std::move (KernelFunc));
647
+ StoreLambda<NameT , KernelType, Dims>(std::move (KernelFunc));
644
648
MCGType = detail::CG::KERNEL;
645
649
#endif
646
650
}
647
651
648
652
// parallel_for version with a kernel represented as a lambda + nd_range that
649
653
// specifies global, local sizes and offset.
650
- template <typename KernelName, typename KernelType, int Dims>
654
+ template <typename KernelName = csd::auto_name , typename KernelType, int Dims>
651
655
void parallel_for (nd_range<Dims> ExecutionRange, KernelType KernelFunc) {
656
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
652
657
#ifdef __SYCL_DEVICE_ONLY__
653
- kernel_parallel_for<KernelName , KernelType, Dims>(KernelFunc);
658
+ kernel_parallel_for<NameT , KernelType, Dims>(KernelFunc);
654
659
#else
655
660
MNDRDesc.set (std::move (ExecutionRange));
656
- StoreLambda<KernelName , KernelType, Dims>(std::move (KernelFunc));
661
+ StoreLambda<NameT , KernelType, Dims>(std::move (KernelFunc));
657
662
MCGType = detail::CG::KERNEL;
658
663
#endif
659
664
}
660
665
661
- // parallel_for version with a kernel represented as a functor + nd_range that
662
- // specifies global, local sizes and offset. Simply redirect to the
663
- // lambda-based form of invocation, setting kernel name type to the functor
664
- // type.
665
- template <typename KernelType, int Dims>
666
- void parallel_for (nd_range<Dims> ExecutionRange, KernelType KernelFunc) {
667
- parallel_for<KernelType, KernelType, Dims>(ExecutionRange, KernelFunc);
668
- }
669
-
670
666
// template <typename KernelName, typename WorkgroupFunctionType, int
671
667
// dimensions>
672
668
// void parallel_for_work_group(range<dimensions> numWorkGroups,
@@ -732,111 +728,82 @@ class handler {
732
728
// single_task version which takes two "kernels". One is a lambda which is
733
729
// used if device, queue is bound to, is host device. Second is a sycl::kernel
734
730
// which is used otherwise.
735
- template <typename KernelName, typename KernelType>
731
+ template <typename KernelName = csd::auto_name , typename KernelType>
736
732
void single_task (kernel SyclKernel, KernelType KernelFunc) {
733
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
737
734
#ifdef __SYCL_DEVICE_ONLY__
738
- kernel_single_task<KernelName >(KernelFunc);
735
+ kernel_single_task<NameT >(KernelFunc);
739
736
#else
740
737
MNDRDesc.set (range<1 >{1 });
741
738
MSyclKernel = detail::getSyclObjImpl (std::move (SyclKernel));
742
739
MCGType = detail::CG::KERNEL;
743
- if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName >())
740
+ if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT >())
744
741
extractArgsAndReqs ();
745
742
else
746
- StoreLambda<KernelName, KernelType, /* Dims*/ 0 , void >(
747
- std::move (KernelFunc));
743
+ StoreLambda<NameT, KernelType, /* Dims*/ 0 , void >(std::move (KernelFunc));
748
744
#endif
749
745
}
750
746
751
- // single_task version which takes two "kernels". One is a functor which is
752
- // used if device, queue is bound to, is host device. Second is a sycl::kernel
753
- // which is used otherwise. Simply redirect to the lambda-based form of
754
- // invocation, setting kernel name type to the functor type.
755
- template <typename KernelType>
756
- void single_task (kernel SyclKernel, KernelType KernelFunc) {
757
- single_task<KernelType, KernelType>(SyclKernel, KernelFunc);
758
- }
759
-
760
747
// parallel_for version which takes two "kernels". One is a lambda which is
761
748
// used if device, queue is bound to, is host device. Second is a sycl::kernel
762
749
// which is used otherwise. range argument specifies global size.
763
- template <typename KernelName, typename KernelType, int Dims>
764
- void parallel_for (range<Dims> NumWorkItems, kernel SyclKernel ,
750
+ template <typename KernelName = csd::auto_name , typename KernelType, int Dims>
751
+ void parallel_for (kernel SyclKernel, range<Dims> NumWorkItems,
765
752
KernelType KernelFunc) {
753
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
766
754
#ifdef __SYCL_DEVICE_ONLY__
767
- kernel_parallel_for<KernelName , KernelType, Dims>(KernelFunc);
755
+ kernel_parallel_for<NameT , KernelType, Dims>(KernelFunc);
768
756
#else
769
757
MNDRDesc.set (std::move (NumWorkItems));
770
758
MSyclKernel = detail::getSyclObjImpl (std::move (SyclKernel));
771
759
MCGType = detail::CG::KERNEL;
772
- if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName >())
760
+ if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT >())
773
761
extractArgsAndReqs ();
774
762
else
775
- StoreLambda<KernelName , KernelType, Dims>(std::move (KernelFunc));
763
+ StoreLambda<NameT , KernelType, Dims>(std::move (KernelFunc));
776
764
#endif
777
765
}
778
766
779
- // parallel_for version which takes two "kernels". One is a functor which is
780
- // used if device, queue is bound to, is host device. Second is a sycl::kernel
781
- // which is used otherwise. range argument specifies global size. Simply
782
- // redirect to the lambda-based form of invocation, setting kernel name type
783
- // to the functor type.
784
- template <typename KernelType, int Dims>
785
- void parallel_for (range<Dims> NumWorkItems, kernel SyclKernel,
786
- KernelType KernelFunc) {
787
- parallel_for<KernelType, KernelType, Dims>(NumWorkItems, SyclKernel,
788
- KernelFunc);
789
- }
790
-
791
767
// parallel_for version which takes two "kernels". One is a lambda which is
792
768
// used if device, queue is bound to, is host device. Second is a sycl::kernel
793
769
// which is used otherwise. range and id specify global size and offset.
794
- template <typename KernelName, typename KernelType, int Dims>
795
- void parallel_for (range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
796
- kernel SyclKernel, KernelType KernelFunc) {
770
+ template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
771
+ void parallel_for (kernel SyclKernel, range<Dims> NumWorkItems,
772
+ id<Dims> WorkItemOffset, KernelType KernelFunc) {
773
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
797
774
#ifdef __SYCL_DEVICE_ONLY__
798
- kernel_parallel_for<KernelName , KernelType, Dims>(KernelFunc);
775
+ kernel_parallel_for<NameT , KernelType, Dims>(KernelFunc);
799
776
#else
800
777
MNDRDesc.set (std::move (NumWorkItems), std::move (WorkItemOffset));
801
778
MSyclKernel = detail::getSyclObjImpl (std::move (SyclKernel));
802
779
MCGType = detail::CG::KERNEL;
803
- if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName >())
780
+ if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT >())
804
781
extractArgsAndReqs ();
805
782
else
806
- StoreLambda<KernelName , KernelType, Dims>(std::move (KernelFunc));
783
+ StoreLambda<NameT , KernelType, Dims>(std::move (KernelFunc));
807
784
#endif
808
785
}
809
786
810
787
// parallel_for version which takes two "kernels". One is a lambda which is
811
788
// used if device, queue is bound to, is host device. Second is a sycl::kernel
812
789
// which is used otherwise. nd_range specifies global, local size and offset.
813
- template <typename KernelName, typename KernelType, int Dims>
814
- void parallel_for (nd_range<Dims> NDRange, kernel SyclKernel ,
790
+ template <typename KernelName = csd::auto_name , typename KernelType, int Dims>
791
+ void parallel_for (kernel SyclKernel, nd_range<Dims> NDRange,
815
792
KernelType KernelFunc) {
793
+ using NameT = typename csd::get_kernel_name_t <KernelName, KernelType>::name;
816
794
#ifdef __SYCL_DEVICE_ONLY__
817
- kernel_parallel_for<KernelName , KernelType, Dims>(KernelFunc);
795
+ kernel_parallel_for<NameT , KernelType, Dims>(KernelFunc);
818
796
#else
819
797
MNDRDesc.set (std::move (NDRange));
820
798
MSyclKernel = detail::getSyclObjImpl (std::move (SyclKernel));
821
799
MCGType = detail::CG::KERNEL;
822
- if (!MIsHost && !lambdaAndKernelHaveEqualName<KernelName >())
800
+ if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT >())
823
801
extractArgsAndReqs ();
824
802
else
825
- StoreLambda<KernelName , KernelType, Dims>(std::move (KernelFunc));
803
+ StoreLambda<NameT , KernelType, Dims>(std::move (KernelFunc));
826
804
#endif
827
805
}
828
806
829
- // parallel_for version which takes two "kernels". One is a functor which is
830
- // used if device, queue is bound to, is host device. Second is a sycl::kernel
831
- // which is used otherwise. nd_range specifies global, local size and offset.
832
- // Simply redirects to the lambda-based form of invocation, setting kernel
833
- // name type to the functor type.
834
- template <typename KernelType, int Dims>
835
- void parallel_for (nd_range<Dims> NDRange, kernel SyclKernel,
836
- KernelType KernelFunc) {
837
- parallel_for<KernelType, KernelType, Dims>(NDRange, SyclKernel, KernelFunc);
838
- }
839
-
840
807
// template <typename KernelName, typename WorkgroupFunctionType, int
841
808
// dimensions>
842
809
// void parallel_for_work_group(range<dimensions> num_work_groups, kernel
0 commit comments