From bafeffa7dee202c01c3e88100f573a1bacc51af1 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 25 Jan 2022 11:28:28 -0800 Subject: [PATCH 01/13] initial checkpoint --- sycl/include/CL/sycl/group_algorithm.hpp | 51 +++++++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 6ede3ff23b68f..2925ba4e2ac94 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #pragma once +#include + #include #include #include @@ -20,6 +22,8 @@ #include #include +#define SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS 1 + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -97,6 +101,17 @@ template struct is_native_op { is_contained>::value; }; + +// CP +// ---- is_complex +template +struct is_complex : std::bool_constant> || std::is_same_v> || std::is_same_v>> {}; + +// ---- is_arithmetic_or_complex +template +using is_arithmetic_or_complex = + std::bool_constant::value || sycl::detail::is_arithmetic::value>; + // ---- for_each template Function for_each(Group g, Ptr first, Ptr last, Function f) { @@ -119,6 +134,9 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) { } // namespace detail // ---- reduce_over_group +// three argument variant is specialized thrice: +// scalar arithmetic, complex (plus only), and vector arithmetic (sycl::vec) + template detail::enable_if_t<(is_group_v> && detail::is_scalar_arithmetic::value && @@ -141,6 +159,32 @@ reduce_over_group(Group, T x, BinaryOperation binary_op) { #endif } + + +// CANONICAL CALL: std::complex res = sycl::reduce_over_group(item.get_group(),val,std::plus>()); + +// complex specializaion. T is std::complex or similar. +// binary op is sycl::plus> +template +detail::enable_if_t<(is_group_v> && + detail::is_complex::value && + detail::is_native_op>::value), + T> +reduce_over_group(Group g, T x, sycl::plus binary_op) { +#ifdef __SYCL_DEVICE_ONLY__ + // return sycl::detail::calc::value>( + // typename sycl::detail::GroupOpTag::type(), x, binary_op); + T result; // + result.real(reduce_over_group(g, x.real(), sycl::plus<>())); + result.imag(reduce_over_group(g, x.imag(), sycl::plus<>())); + return result; +#else + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + template detail::enable_if_t<(is_group_v> && detail::is_vector_arithmetic::value && @@ -161,10 +205,12 @@ reduce_over_group(Group g, T x, BinaryOperation binary_op) { return result; } +// four argument variant of reduce_over_group specialized twice +// (scalar arithmetic || complex), and vector_arithmetic template detail::enable_if_t<(is_group_v> && - detail::is_scalar_arithmetic::value && - detail::is_scalar_arithmetic::value && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && detail::is_native_op::value && detail::is_native_op::value), T> @@ -184,6 +230,7 @@ reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { #endif } + template detail::enable_if_t<(is_group_v> && detail::is_vector_arithmetic::value && From 4e9e3216d6ceb87ba8b57f3c558972dde1f581bf Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 25 Jan 2022 13:11:56 -0800 Subject: [PATCH 02/13] joint_reduce and reduce_over_group now support std::complex, limited to sycl::plus binary op Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 70 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 2925ba4e2ac94..a69010fdaa177 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -101,17 +101,10 @@ template struct is_native_op { is_contained>::value; }; - -// CP // ---- is_complex template struct is_complex : std::bool_constant> || std::is_same_v> || std::is_same_v>> {}; -// ---- is_arithmetic_or_complex -template -using is_arithmetic_or_complex = - std::bool_constant::value || sycl::detail::is_arithmetic::value>; - // ---- for_each template Function for_each(Group g, Ptr first, Ptr last, Function f) { @@ -260,24 +253,18 @@ reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { } // ---- joint_reduce +// specialized for is_arithmetic (both scalar and vector) and complex +// (limited to sycl::plus) template detail::enable_if_t< (is_group_v> && detail::is_pointer::value && detail::is_arithmetic::type>::value), typename detail::remove_pointer::type> joint_reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { - using T = typename detail::remove_pointer::type; - // FIXME: Do not special-case for half precision - static_assert( - std::is_same::value || - (std::is_same::value && - std::is_same::value), - "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - T partial = sycl::known_identity_v; - sycl::detail::for_each(g, first, last, - [&](const T &x) { partial = binary_op(partial, x); }); - return reduce_over_group(g, partial, binary_op); + using T = typename detail::remove_pointer::type; + T init = sycl::known_identity_v; + return joint_reduce(g, first, last, init, binary_op); #else (void)g; (void)last; @@ -318,6 +305,53 @@ joint_reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { #endif } +// specializations of joint_reduce for complex, limited to sycl::plus operation. +// T will be std::complex or similar +template +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_complex::type>::value), + typename detail::remove_pointer::type> +joint_reduce(Group g, Ptr first, Ptr last, + sycl::plus::type> binary_op) { +#ifdef __SYCL_DEVICE_ONLY__ + using T = typename detail::remove_pointer::type; + T init{0, 0}; + return joint_reduce(g, first, last, init, binary_op); +#else + (void)g; + (void)last; + (void)binary_op; + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + +template +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_complex::type>::value && + detail::is_complex::value && + detail::is_native_op::type, + sycl::plus>::value && + detail::is_native_op>::value), + T> +joint_reduce(Group g, Ptr first, Ptr last, T init, sycl::plus binary_op) { +#ifdef __SYCL_DEVICE_ONLY__ + T partial{0, 0}; + sycl::detail::for_each( + g, first, last, [&](const typename detail::remove_pointer::type &x) { + partial += x; + }); + return reduce_over_group(g, partial, init, binary_op); +#else + (void)g; + (void)last; + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + // ---- any_of_group template detail::enable_if_t>, bool> From a7b034984c9e2d49c8863727534221a7c76c227f Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 25 Jan 2022 13:21:31 -0800 Subject: [PATCH 03/13] cleanup Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index a69010fdaa177..dc5a214f3d330 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -152,11 +152,7 @@ reduce_over_group(Group, T x, BinaryOperation binary_op) { #endif } - - -// CANONICAL CALL: std::complex res = sycl::reduce_over_group(item.get_group(),val,std::plus>()); - -// complex specializaion. T is std::complex or similar. +// complex specialization. T is std::complex or similar. // binary op is sycl::plus> template detail::enable_if_t<(is_group_v> && @@ -165,10 +161,7 @@ detail::enable_if_t<(is_group_v> && T> reduce_over_group(Group g, T x, sycl::plus binary_op) { #ifdef __SYCL_DEVICE_ONLY__ - // return sycl::detail::calc::value>( - // typename sycl::detail::GroupOpTag::type(), x, binary_op); - T result; // + T result; result.real(reduce_over_group(g, x.real(), sycl::plus<>())); result.imag(reduce_over_group(g, x.imag(), sycl::plus<>())); return result; @@ -254,7 +247,6 @@ reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { // ---- joint_reduce // specialized for is_arithmetic (both scalar and vector) and complex -// (limited to sycl::plus) template detail::enable_if_t< (is_group_v> && detail::is_pointer::value && From 1790647b743b5d1827da052ecf69497cd0481544 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 25 Jan 2022 18:21:41 -0800 Subject: [PATCH 04/13] exclusiv_scan_over_group and joint_exclusive_scan Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 106 +++++++++++++++++++++-- 1 file changed, 100 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index dc5a214f3d330..3da749447df90 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -589,6 +589,10 @@ group_broadcast(Group g, T x) { } // ---- exclusive_scan_over_group +// this function has two overloads, one with three arguments and one with four +// (init) +// the three argument version is specialized thrice: scalar, complex and +// vector template detail::enable_if_t<(is_group_v> && detail::is_scalar_arithmetic::value && @@ -610,6 +614,25 @@ exclusive_scan_over_group(Group, T x, BinaryOperation binary_op) { #endif } +// complex specialization. T is std::complex or similar. +// binary op is sycl::plus> +template +detail::enable_if_t<(is_group_v> && + detail::is_complex::value && + detail::is_native_op>::value), + T> +exclusive_scan_over_group(Group g, T x, sycl::plus binary_op) { +#ifdef __SYCL_DEVICE_ONLY__ + T result; + result.real(exclusive_scan_over_group(g, x.real(), sycl::plus<>())); + result.imag(exclusive_scan_over_group(g, x.imag(), sycl::plus<>())); + return result; +#else + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + template detail::enable_if_t<(is_group_v> && detail::is_vector_arithmetic::value && @@ -630,6 +653,8 @@ exclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) { return result; } +// four argument version of exclusive_scan_over_group is specialized twice +// once for vector_arithmetic, once for (scalar_arithmetic || complex) template detail::enable_if_t<(is_group_v> && detail::is_vector_arithmetic::value && @@ -653,12 +678,13 @@ exclusive_scan_over_group(Group g, V x, T init, BinaryOperation binary_op) { } template -detail::enable_if_t<(is_group_v> && - detail::is_scalar_arithmetic::value && - detail::is_scalar_arithmetic::value && - detail::is_native_op::value && - detail::is_native_op::value), - T> +detail::enable_if_t< + (is_group_v> && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + detail::is_native_op::value && + detail::is_native_op::value), + T> exclusive_scan_over_group(Group g, V x, T init, BinaryOperation binary_op) { // FIXME: Do not special-case for half precision static_assert(std::is_same::value || @@ -684,6 +710,9 @@ exclusive_scan_over_group(Group g, V x, T init, BinaryOperation binary_op) { } // ---- joint_exclusive_scan +// has two overloads: 5 arguments and 6 arguments, +// each is specialized twice: is_arithmetic and is_complex. + template detail::enable_if_t< @@ -765,6 +794,71 @@ joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, binary_op); } +// specialized for complex +template +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_pointer::value && + detail::is_complex::type>::value && + detail::is_complex::value && + detail::is_native_op< + typename detail::remove_pointer::type, + sycl::plus::type>>::value && + detail::is_native_op>::value), + OutPtr> +joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, + sycl::plus binary_op) { +#ifdef __SYCL_DEVICE_ONLY__ + ptrdiff_t offset = sycl::detail::get_local_linear_id(g); + ptrdiff_t stride = sycl::detail::get_local_linear_range(g); + ptrdiff_t N = last - first; + auto roundup = [=](const ptrdiff_t &v, + const ptrdiff_t &divisor) -> ptrdiff_t { + return ((v + divisor - 1) / divisor) * divisor; + }; + typename std::remove_const::type>::type + x; + typename detail::remove_pointer::type carry = init; + for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) { + ptrdiff_t i = chunk + offset; + if (i < N) { + x = first[i]; + } + typename detail::remove_pointer::type out = + exclusive_scan_over_group(g, x, carry, binary_op); + if (i < N) { + result[i] = out; + } + carry = group_broadcast(g, binary_op(out, x), stride - 1); + } + return result + N; +#else + (void)g; + (void)last; + (void)result; + (void)init; + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + +template +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_pointer::value && + detail::is_complex::type>::value && + detail::is_native_op< + typename detail::remove_pointer::type, + sycl::plus::type>>::value), + OutPtr> +joint_exclusive_scan( + Group g, InPtr first, InPtr last, OutPtr result, + sycl::plus::type> binary_op) { + using T = typename detail::remove_pointer::type; + T init{0, 0}; + return joint_exclusive_scan(g, first, last, result, init, binary_op); +} + // ---- inclusive_scan_over_group template detail::enable_if_t<(is_group_v> && From ca53732dfc6fadce9e8d2380612baf06c8e9efbb Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 26 Jan 2022 10:48:46 -0800 Subject: [PATCH 05/13] inclusive_scan_over_group and joint_inclusive_scan Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 106 +++++++++++++++++++++-- 1 file changed, 98 insertions(+), 8 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 3da749447df90..5e3d28345ae8e 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -590,9 +590,8 @@ group_broadcast(Group g, T x) { // ---- exclusive_scan_over_group // this function has two overloads, one with three arguments and one with four -// (init) -// the three argument version is specialized thrice: scalar, complex and -// vector +// the three argument version is specialized thrice: scalar, complex, and +// vector template detail::enable_if_t<(is_group_v> && detail::is_scalar_arithmetic::value && @@ -860,6 +859,9 @@ joint_exclusive_scan( } // ---- inclusive_scan_over_group +// this function has two overloads, one with three arguments and one with four +// the three argument version is specialized thrice: vector, scalar, and +// complex template detail::enable_if_t<(is_group_v> && detail::is_vector_arithmetic::value && @@ -901,13 +903,34 @@ inclusive_scan_over_group(Group, T x, BinaryOperation binary_op) { #endif } -template +// complex specializaiton +template detail::enable_if_t<(is_group_v> && - detail::is_scalar_arithmetic::value && - detail::is_scalar_arithmetic::value && - detail::is_native_op::value && - detail::is_native_op::value), + detail::is_complex::value && + detail::is_native_op>::value), T> +inclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) { +#ifdef __SYCL_DEVICE_ONLY__ + T result; + result.real(inclusive_scan_over_group(g, x.real(), sycl::plus<>())); + result.imag(inclusive_scan_over_group(g, x.imag(), sycl::plus<>())); + return result; +#else + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + +// four argument version of exclusive_scan_over_group is specialized twice +// once for (scalar_arithmetic || complex) and once for vector_arithmetic +template +detail::enable_if_t< + (is_group_v> && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + detail::is_native_op::value && + detail::is_native_op::value), + T> inclusive_scan_over_group(Group g, V x, BinaryOperation binary_op, T init) { // FIXME: Do not special-case for half precision static_assert(std::is_same::value || @@ -948,6 +971,9 @@ inclusive_scan_over_group(Group g, V x, BinaryOperation binary_op, T init) { } // ---- joint_inclusive_scan +// has two overloads: 5 arguments and 6 arguments, +// each is specialized twice: is_arithmetic and is_complex. + template detail::enable_if_t< @@ -1027,6 +1053,70 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, typename detail::remove_pointer::type>); } +// complex specializations +template +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_pointer::value && + detail::is_complex::type>::value && + detail::is_complex::value && + detail::is_native_op< + typename detail::remove_pointer::type, + sycl::plus::type>>::value && + detail::is_native_op>::value), + OutPtr> +joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, + sycl::plus binary_op, T init) { +#ifdef __SYCL_DEVICE_ONLY__ + ptrdiff_t offset = sycl::detail::get_local_linear_id(g); + ptrdiff_t stride = sycl::detail::get_local_linear_range(g); + ptrdiff_t N = last - first; + auto roundup = [=](const ptrdiff_t &v, + const ptrdiff_t &divisor) -> ptrdiff_t { + return ((v + divisor - 1) / divisor) * divisor; + }; + typename std::remove_const::type>::type + x; + typename detail::remove_pointer::type carry = init; + for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) { + ptrdiff_t i = chunk + offset; + if (i < N) { + x = first[i]; + } + typename detail::remove_pointer::type out = + inclusive_scan_over_group(g, x, binary_op, carry); + if (i < N) { + result[i] = out; + } + carry = group_broadcast(g, out, stride - 1); + } + return result + N; +#else + (void)g; + (void)last; + (void)result; + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); +#endif +} + +template +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_pointer::value && + detail::is_complex::type>::value && + detail::is_native_op< + typename detail::remove_pointer::type, + sycl::plus::type>>::value), + OutPtr> +joint_inclusive_scan( + Group g, InPtr first, InPtr last, OutPtr result, + sycl::plus::type> binary_op) { + using T = typename detail::remove_pointer::type; + T init{0, 0}; + return joint_inclusive_scan(g, first, last, result, binary_op, init); +} + namespace detail { template struct group_barrier_scope {}; template <> struct group_barrier_scope { From 6a1cfd208de5c23c34cbf110cc78aca425c1401a Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 26 Jan 2022 11:45:10 -0800 Subject: [PATCH 06/13] ensure unused var warning is avoided Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 5e3d28345ae8e..8202bf2a7f962 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -166,6 +166,9 @@ reduce_over_group(Group g, T x, sycl::plus binary_op) { result.imag(reduce_over_group(g, x.imag(), sycl::plus<>())); return result; #else + (void)g; + (void)x; + (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); #endif @@ -259,6 +262,7 @@ joint_reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { return joint_reduce(g, first, last, init, binary_op); #else (void)g; + (void)first; (void)last; (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", @@ -312,6 +316,7 @@ joint_reduce(Group g, Ptr first, Ptr last, return joint_reduce(g, first, last, init, binary_op); #else (void)g; + (void)first; (void)last; (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", @@ -338,7 +343,10 @@ joint_reduce(Group g, Ptr first, Ptr last, T init, sycl::plus binary_op) { return reduce_over_group(g, partial, init, binary_op); #else (void)g; + (void)first; (void)last; + (void)init; + (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); #endif @@ -627,6 +635,9 @@ exclusive_scan_over_group(Group g, T x, sycl::plus binary_op) { result.imag(exclusive_scan_over_group(g, x.imag(), sycl::plus<>())); return result; #else + (void)g; + (void)x; + (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); #endif @@ -833,9 +844,11 @@ joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, return result + N; #else (void)g; + (void)first; (void)last; (void)result; (void)init; + (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); #endif @@ -916,6 +929,9 @@ inclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) { result.imag(inclusive_scan_over_group(g, x.imag(), sycl::plus<>())); return result; #else + (void)g; + (void)x; + (void)binary_op; throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); #endif @@ -1093,8 +1109,11 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, return result + N; #else (void)g; + (void)first; (void)last; (void)result; + (void)binary_op; + (void)init; throw runtime_error("Group algorithms are not supported on host device.", PI_INVALID_DEVICE); #endif From 3e559f9b38b39178cd0bb25da39e0c2b1f2fc423 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 26 Jan 2022 13:52:43 -0800 Subject: [PATCH 07/13] straight up specialization of the joint_X functions produces too much code duplication. Removed duplicates with a small bit of SFINAE instead. Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 252 ++++------------------- 1 file changed, 42 insertions(+), 210 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 8202bf2a7f962..f3a8f49d77709 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -105,6 +105,29 @@ template struct is_native_op { template struct is_complex : std::bool_constant> || std::is_same_v> || std::is_same_v>> {}; +// ---- is_arithmetic_or_complex +template +using is_arithmetic_or_complex = + std::bool_constant::value || + sycl::detail::is_arithmetic::value>; + +// ---- identity_for_ga_op +// the group algorithms support std::complex, limited to sycl::plus operation +// get the correct identity for group algorithm operation. +template +constexpr detail::enable_if_t< + (is_complex::value && + std::is_same>::value), + T> +identity_for_ga_op() { + return {0, 0}; +} + +template +constexpr detail::enable_if_t::value, T> identity_for_ga_op() { + return sycl::known_identity_v; +} + // ---- for_each template Function for_each(Group g, Ptr first, Ptr last, Function f) { @@ -249,16 +272,16 @@ reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { } // ---- joint_reduce -// specialized for is_arithmetic (both scalar and vector) and complex template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_arithmetic::type>::value), - typename detail::remove_pointer::type> +detail::enable_if_t<(is_group_v> && + detail::is_pointer::value && + detail::is_arithmetic_or_complex< + typename detail::remove_pointer::type>::value), + typename detail::remove_pointer::type> joint_reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { #ifdef __SYCL_DEVICE_ONLY__ using T = typename detail::remove_pointer::type; - T init = sycl::known_identity_v; + T init = detail::identity_for_ga_op(); return joint_reduce(g, first, last, init, binary_op); #else (void)g; @@ -273,8 +296,9 @@ joint_reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { template detail::enable_if_t< (is_group_v> && detail::is_pointer::value && - detail::is_arithmetic::type>::value && - detail::is_arithmetic::value && + detail::is_arithmetic_or_complex< + typename detail::remove_pointer::type>::value && + detail::is_arithmetic_or_complex::value && detail::is_native_op::type, BinaryOperation>::value && detail::is_native_op::value), @@ -287,7 +311,8 @@ joint_reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - T partial = sycl::known_identity_v; + // T partial = sycl::known_identity_v; + T partial = detail::identity_for_ga_op(); sycl::detail::for_each( g, first, last, [&](const typename detail::remove_pointer::type &x) { partial = binary_op(partial, x); @@ -301,57 +326,6 @@ joint_reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { #endif } -// specializations of joint_reduce for complex, limited to sycl::plus operation. -// T will be std::complex or similar -template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_complex::type>::value), - typename detail::remove_pointer::type> -joint_reduce(Group g, Ptr first, Ptr last, - sycl::plus::type> binary_op) { -#ifdef __SYCL_DEVICE_ONLY__ - using T = typename detail::remove_pointer::type; - T init{0, 0}; - return joint_reduce(g, first, last, init, binary_op); -#else - (void)g; - (void)first; - (void)last; - (void)binary_op; - throw runtime_error("Group algorithms are not supported on host device.", - PI_INVALID_DEVICE); -#endif -} - -template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_complex::type>::value && - detail::is_complex::value && - detail::is_native_op::type, - sycl::plus>::value && - detail::is_native_op>::value), - T> -joint_reduce(Group g, Ptr first, Ptr last, T init, sycl::plus binary_op) { -#ifdef __SYCL_DEVICE_ONLY__ - T partial{0, 0}; - sycl::detail::for_each( - g, first, last, [&](const typename detail::remove_pointer::type &x) { - partial += x; - }); - return reduce_over_group(g, partial, init, binary_op); -#else - (void)g; - (void)first; - (void)last; - (void)init; - (void)binary_op; - throw runtime_error("Group algorithms are not supported on host device.", - PI_INVALID_DEVICE); -#endif -} - // ---- any_of_group template detail::enable_if_t>, bool> @@ -720,17 +694,14 @@ exclusive_scan_over_group(Group g, V x, T init, BinaryOperation binary_op) { } // ---- joint_exclusive_scan -// has two overloads: 5 arguments and 6 arguments, -// each is specialized twice: is_arithmetic and is_complex. - template detail::enable_if_t< (is_group_v> && detail::is_pointer::value && detail::is_pointer::value && - detail::is_arithmetic< + detail::is_arithmetic_or_complex< typename detail::remove_pointer::type>::value && - detail::is_arithmetic::value && + detail::is_arithmetic_or_complex::value && detail::is_native_op::type, BinaryOperation>::value && detail::is_native_op::value), @@ -782,7 +753,7 @@ template > && detail::is_pointer::value && detail::is_pointer::value && - detail::is_arithmetic< + detail::is_arithmetic_or_complex< typename detail::remove_pointer::type>::value && detail::is_native_op::type, BinaryOperation>::value), @@ -797,77 +768,8 @@ joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, half>::value && std::is_same::value), "Result type of binary_op must match scan accumulation type."); - return joint_exclusive_scan( - g, first, last, result, - sycl::known_identity_v::type>, - binary_op); -} - -// specialized for complex -template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_pointer::value && - detail::is_complex::type>::value && - detail::is_complex::value && - detail::is_native_op< - typename detail::remove_pointer::type, - sycl::plus::type>>::value && - detail::is_native_op>::value), - OutPtr> -joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, - sycl::plus binary_op) { -#ifdef __SYCL_DEVICE_ONLY__ - ptrdiff_t offset = sycl::detail::get_local_linear_id(g); - ptrdiff_t stride = sycl::detail::get_local_linear_range(g); - ptrdiff_t N = last - first; - auto roundup = [=](const ptrdiff_t &v, - const ptrdiff_t &divisor) -> ptrdiff_t { - return ((v + divisor - 1) / divisor) * divisor; - }; - typename std::remove_const::type>::type - x; - typename detail::remove_pointer::type carry = init; - for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) { - ptrdiff_t i = chunk + offset; - if (i < N) { - x = first[i]; - } - typename detail::remove_pointer::type out = - exclusive_scan_over_group(g, x, carry, binary_op); - if (i < N) { - result[i] = out; - } - carry = group_broadcast(g, binary_op(out, x), stride - 1); - } - return result + N; -#else - (void)g; - (void)first; - (void)last; - (void)result; - (void)init; - (void)binary_op; - throw runtime_error("Group algorithms are not supported on host device.", - PI_INVALID_DEVICE); -#endif -} - -template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_pointer::value && - detail::is_complex::type>::value && - detail::is_native_op< - typename detail::remove_pointer::type, - sycl::plus::type>>::value), - OutPtr> -joint_exclusive_scan( - Group g, InPtr first, InPtr last, OutPtr result, - sycl::plus::type> binary_op) { using T = typename detail::remove_pointer::type; - T init{0, 0}; + T init = detail::identity_for_ga_op(); return joint_exclusive_scan(g, first, last, result, init, binary_op); } @@ -987,17 +889,14 @@ inclusive_scan_over_group(Group g, V x, BinaryOperation binary_op, T init) { } // ---- joint_inclusive_scan -// has two overloads: 5 arguments and 6 arguments, -// each is specialized twice: is_arithmetic and is_complex. - template detail::enable_if_t< (is_group_v> && detail::is_pointer::value && detail::is_pointer::value && - detail::is_arithmetic< + detail::is_arithmetic_or_complex< typename detail::remove_pointer::type>::value && - detail::is_arithmetic::value && + detail::is_arithmetic_or_complex::value && detail::is_native_op::type, BinaryOperation>::value && detail::is_native_op::value), @@ -1048,7 +947,7 @@ template > && detail::is_pointer::value && detail::is_pointer::value && - detail::is_arithmetic< + detail::is_arithmetic_or_complex< typename detail::remove_pointer::type>::value && detail::is_native_op::type, BinaryOperation>::value), @@ -1063,76 +962,9 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, half>::value && std::is_same::value), "Result type of binary_op must match scan accumulation type."); - return joint_inclusive_scan( - g, first, last, result, binary_op, - sycl::known_identity_v::type>); -} -// complex specializations -template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_pointer::value && - detail::is_complex::type>::value && - detail::is_complex::value && - detail::is_native_op< - typename detail::remove_pointer::type, - sycl::plus::type>>::value && - detail::is_native_op>::value), - OutPtr> -joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, - sycl::plus binary_op, T init) { -#ifdef __SYCL_DEVICE_ONLY__ - ptrdiff_t offset = sycl::detail::get_local_linear_id(g); - ptrdiff_t stride = sycl::detail::get_local_linear_range(g); - ptrdiff_t N = last - first; - auto roundup = [=](const ptrdiff_t &v, - const ptrdiff_t &divisor) -> ptrdiff_t { - return ((v + divisor - 1) / divisor) * divisor; - }; - typename std::remove_const::type>::type - x; - typename detail::remove_pointer::type carry = init; - for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) { - ptrdiff_t i = chunk + offset; - if (i < N) { - x = first[i]; - } - typename detail::remove_pointer::type out = - inclusive_scan_over_group(g, x, binary_op, carry); - if (i < N) { - result[i] = out; - } - carry = group_broadcast(g, out, stride - 1); - } - return result + N; -#else - (void)g; - (void)first; - (void)last; - (void)result; - (void)binary_op; - (void)init; - throw runtime_error("Group algorithms are not supported on host device.", - PI_INVALID_DEVICE); -#endif -} - -template -detail::enable_if_t< - (is_group_v> && detail::is_pointer::value && - detail::is_pointer::value && - detail::is_complex::type>::value && - detail::is_native_op< - typename detail::remove_pointer::type, - sycl::plus::type>>::value), - OutPtr> -joint_inclusive_scan( - Group g, InPtr first, InPtr last, OutPtr result, - sycl::plus::type> binary_op) { using T = typename detail::remove_pointer::type; - T init{0, 0}; + T init = detail::identity_for_ga_op(); return joint_inclusive_scan(g, first, last, result, binary_op, init); } From b5f68f2c02c2b5d94fafd964cb2f87e0d9061d52 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 26 Jan 2022 14:13:06 -0800 Subject: [PATCH 08/13] clang-format expresses its disdain Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index f3a8f49d77709..48a56d69f74b4 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -103,7 +103,10 @@ template struct is_native_op { // ---- is_complex template -struct is_complex : std::bool_constant> || std::is_same_v> || std::is_same_v>> {}; +struct is_complex + : std::bool_constant> || + std::is_same_v> || + std::is_same_v>> {}; // ---- is_arithmetic_or_complex template @@ -151,7 +154,7 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) { // ---- reduce_over_group // three argument variant is specialized thrice: -// scalar arithmetic, complex (plus only), and vector arithmetic (sycl::vec) +// scalar arithmetic, complex (plus only), and vector arithmetic template detail::enable_if_t<(is_group_v> && @@ -220,12 +223,13 @@ reduce_over_group(Group g, T x, BinaryOperation binary_op) { // four argument variant of reduce_over_group specialized twice // (scalar arithmetic || complex), and vector_arithmetic template -detail::enable_if_t<(is_group_v> && - (detail::is_scalar_arithmetic::value || detail::is_complex::value) && - (detail::is_scalar_arithmetic::value || detail::is_complex::value) && - detail::is_native_op::value && - detail::is_native_op::value), - T> +detail::enable_if_t< + (is_group_v> && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + (detail::is_scalar_arithmetic::value || detail::is_complex::value) && + detail::is_native_op::value && + detail::is_native_op::value), + T> reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { // FIXME: Do not special-case for half precision static_assert( @@ -242,7 +246,6 @@ reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { #endif } - template detail::enable_if_t<(is_group_v> && detail::is_vector_arithmetic::value && @@ -311,7 +314,6 @@ joint_reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { std::is_same::value), "Result type of binary_op must match reduction accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ - // T partial = sycl::known_identity_v; T partial = detail::identity_for_ga_op(); sycl::detail::for_each( g, first, last, [&](const typename detail::remove_pointer::type &x) { From 8862baec7274836c40799ce38c0f3e3de90f2d4a Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 27 Jan 2022 09:52:42 -0800 Subject: [PATCH 09/13] restrain enthusiasm to C++14 Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 48a56d69f74b4..943fa5446c854 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -104,15 +104,16 @@ template struct is_native_op { // ---- is_complex template struct is_complex - : std::bool_constant> || - std::is_same_v> || - std::is_same_v>> {}; + : std::integral_constant< + bool, std::is_same>::value || + std::is_same>::value || + std::is_same>::value> {}; // ---- is_arithmetic_or_complex template using is_arithmetic_or_complex = - std::bool_constant::value || - sycl::detail::is_arithmetic::value>; + std::integral_constant::value || + sycl::detail::is_arithmetic::value>; // ---- identity_for_ga_op // the group algorithms support std::complex, limited to sycl::plus operation From b9852c307dfca8627e01ea5f647968ded25f5300 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 31 Jan 2022 20:34:45 -0800 Subject: [PATCH 10/13] address reviewer feedback Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 87 +++++++++++++++++------- 1 file changed, 61 insertions(+), 26 deletions(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 943fa5446c854..5f75d9e7ea597 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -101,28 +101,39 @@ template struct is_native_op { is_contained>::value; }; +// ---- is_plus +template +using is_plus = std::integral_constant< + bool, std::is_same>::value || + std::is_same>::value>; + // ---- is_complex +// NOTE: std::complex not yet supported by group algorithms. template struct is_complex - : std::integral_constant< - bool, std::is_same>::value || - std::is_same>::value || - std::is_same>::value> {}; + : std::integral_constant>::value || + std::is_same>::value> { +}; // ---- is_arithmetic_or_complex template using is_arithmetic_or_complex = std::integral_constant::value || sycl::detail::is_arithmetic::value>; +// ---- is_plus_if_complex +template +using is_plus_if_complex = + std::integral_constant::value + ? is_plus::value + : std::true_type::value)>; // ---- identity_for_ga_op // the group algorithms support std::complex, limited to sycl::plus operation // get the correct identity for group algorithm operation. template constexpr detail::enable_if_t< - (is_complex::value && - std::is_same>::value), - T> + (is_complex::value && is_plus::value), T> identity_for_ga_op() { return {0, 0}; } @@ -181,12 +192,13 @@ reduce_over_group(Group, T x, BinaryOperation binary_op) { // complex specialization. T is std::complex or similar. // binary op is sycl::plus> -template +template detail::enable_if_t<(is_group_v> && detail::is_complex::value && - detail::is_native_op>::value), + detail::is_native_op>::value && + detail::is_plus::value), T> -reduce_over_group(Group g, T x, sycl::plus binary_op) { +reduce_over_group(Group g, T x, BinaryOperation binary_op) { #ifdef __SYCL_DEVICE_ONLY__ T result; result.real(reduce_over_group(g, x.real(), sycl::plus<>())); @@ -229,7 +241,9 @@ detail::enable_if_t< (detail::is_scalar_arithmetic::value || detail::is_complex::value) && (detail::is_scalar_arithmetic::value || detail::is_complex::value) && detail::is_native_op::value && - detail::is_native_op::value), + detail::is_native_op::value && + detail::is_plus_if_complex::value && + detail::is_plus_if_complex::value), T> reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { // FIXME: Do not special-case for half precision @@ -277,11 +291,13 @@ reduce_over_group(Group g, V x, T init, BinaryOperation binary_op) { // ---- joint_reduce template -detail::enable_if_t<(is_group_v> && - detail::is_pointer::value && - detail::is_arithmetic_or_complex< - typename detail::remove_pointer::type>::value), - typename detail::remove_pointer::type> +detail::enable_if_t< + (is_group_v> && detail::is_pointer::value && + detail::is_arithmetic_or_complex< + typename detail::remove_pointer::type>::value && + detail::is_plus_if_complex::type, + BinaryOperation>::value), + typename detail::remove_pointer::type> joint_reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { #ifdef __SYCL_DEVICE_ONLY__ using T = typename detail::remove_pointer::type; @@ -305,6 +321,9 @@ detail::enable_if_t< detail::is_arithmetic_or_complex::value && detail::is_native_op::type, BinaryOperation>::value && + detail::is_plus_if_complex::type, + BinaryOperation>::value && + detail::is_plus_if_complex::value && detail::is_native_op::value), T> joint_reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { @@ -600,12 +619,13 @@ exclusive_scan_over_group(Group, T x, BinaryOperation binary_op) { // complex specialization. T is std::complex or similar. // binary op is sycl::plus> -template +template detail::enable_if_t<(is_group_v> && detail::is_complex::value && - detail::is_native_op>::value), + detail::is_native_op>::value && + detail::is_plus::value), T> -exclusive_scan_over_group(Group g, T x, sycl::plus binary_op) { +exclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) { #ifdef __SYCL_DEVICE_ONLY__ T result; result.real(exclusive_scan_over_group(g, x.real(), sycl::plus<>())); @@ -670,7 +690,9 @@ detail::enable_if_t< (detail::is_scalar_arithmetic::value || detail::is_complex::value) && (detail::is_scalar_arithmetic::value || detail::is_complex::value) && detail::is_native_op::value && - detail::is_native_op::value), + detail::is_native_op::value && + detail::is_plus_if_complex::value && + detail::is_plus_if_complex::value), T> exclusive_scan_over_group(Group g, V x, T init, BinaryOperation binary_op) { // FIXME: Do not special-case for half precision @@ -707,7 +729,10 @@ detail::enable_if_t< detail::is_arithmetic_or_complex::value && detail::is_native_op::type, BinaryOperation>::value && - detail::is_native_op::value), + detail::is_native_op::value && + detail::is_plus_if_complex::type, + BinaryOperation>::value && + detail::is_plus_if_complex::value), OutPtr> joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, BinaryOperation binary_op) { @@ -759,7 +784,9 @@ detail::enable_if_t< detail::is_arithmetic_or_complex< typename detail::remove_pointer::type>::value && detail::is_native_op::type, - BinaryOperation>::value), + BinaryOperation>::value && + detail::is_plus_if_complex::type, + BinaryOperation>::value), OutPtr> joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op) { @@ -825,7 +852,8 @@ inclusive_scan_over_group(Group, T x, BinaryOperation binary_op) { template detail::enable_if_t<(is_group_v> && detail::is_complex::value && - detail::is_native_op>::value), + detail::is_native_op>::value && + detail::is_plus::value), T> inclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) { #ifdef __SYCL_DEVICE_ONLY__ @@ -850,7 +878,9 @@ detail::enable_if_t< (detail::is_scalar_arithmetic::value || detail::is_complex::value) && (detail::is_scalar_arithmetic::value || detail::is_complex::value) && detail::is_native_op::value && - detail::is_native_op::value), + detail::is_native_op::value && + detail::is_plus_if_complex::value && + detail::is_plus_if_complex::value), T> inclusive_scan_over_group(Group g, V x, BinaryOperation binary_op, T init) { // FIXME: Do not special-case for half precision @@ -902,7 +932,10 @@ detail::enable_if_t< detail::is_arithmetic_or_complex::value && detail::is_native_op::type, BinaryOperation>::value && - detail::is_native_op::value), + detail::is_native_op::value && + detail::is_plus_if_complex::type, + BinaryOperation>::value && + detail::is_plus_if_complex::value), OutPtr> joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op, T init) { @@ -953,7 +986,9 @@ detail::enable_if_t< detail::is_arithmetic_or_complex< typename detail::remove_pointer::type>::value && detail::is_native_op::type, - BinaryOperation>::value), + BinaryOperation>::value && + detail::is_plus_if_complex::type, + BinaryOperation>::value), OutPtr> joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op) { From 5a4990cdc66cd4d8acaa26d7f0cb407cb590b235 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 31 Jan 2022 20:45:00 -0800 Subject: [PATCH 11/13] fix comment Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 5f75d9e7ea597..5a06308adec6a 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -870,7 +870,7 @@ inclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) { #endif } -// four argument version of exclusive_scan_over_group is specialized twice +// four argument version of inclusive_scan_over_group is specialized twice // once for (scalar_arithmetic || complex) and once for vector_arithmetic template detail::enable_if_t< From 9823b32ddd4c9f2c785062d098898e3b8a1c592f Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 1 Feb 2022 09:46:50 -0800 Subject: [PATCH 12/13] move SYCL_EXT_ define to feature_test.hpp.in Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/feature_test.hpp.in | 7 +++++++ sycl/include/CL/sycl/group_algorithm.hpp | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/feature_test.hpp.in b/sycl/include/CL/sycl/feature_test.hpp.in index e6053ebf4ff1c..44962a4dde012 100644 --- a/sycl/include/CL/sycl/feature_test.hpp.in +++ b/sycl/include/CL/sycl/feature_test.hpp.in @@ -5,6 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // ===--------------------------------------------------------------------=== // + +// +// IMPORTANT: feature_test.hpp is a generated file - DO NOT EDIT +// original definitions are in feature_test.hpp.in +// + #pragma once #include @@ -35,6 +41,7 @@ namespace sycl { #define SYCL_EXT_ONEAPI_MATRIX 2 #endif #define SYCL_EXT_ONEAPI_ASSERT 1 +#define SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS 1 #define SYCL_EXT_ONEAPI_DISCARD_QUEUE_EVENTS 1 #define SYCL_EXT_ONEAPI_ENQUEUE_BARRIER 1 #define SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES 1 diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 5a06308adec6a..985688a9908df 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -22,8 +22,6 @@ #include #include -#define SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS 1 - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { From a30df28d365ea3e929743573a57c05441b1ffd0d Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 4 Feb 2022 09:08:30 -0800 Subject: [PATCH 13/13] comment on identity_for_ga_aop Signed-off-by: Chris Perkins --- sycl/include/CL/sycl/group_algorithm.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/CL/sycl/group_algorithm.hpp b/sycl/include/CL/sycl/group_algorithm.hpp index 985688a9908df..bc3a1d9834edf 100644 --- a/sycl/include/CL/sycl/group_algorithm.hpp +++ b/sycl/include/CL/sycl/group_algorithm.hpp @@ -129,6 +129,8 @@ using is_plus_if_complex = // ---- identity_for_ga_op // the group algorithms support std::complex, limited to sycl::plus operation // get the correct identity for group algorithm operation. +// TODO: identiy_for_ga_op should be replaced with known_identity once the other +// callers of known_identity support complex numbers. template constexpr detail::enable_if_t< (is_complex::value && is_plus::value), T>