diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index bf08b5c3759a8..d5505b43c7340 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -10,11 +10,20 @@ #include +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + // Include the generated builtins. #include #include #include +#else // __INTEL_PREVIEW_BREAKING_CHANGES + +#include +#include + +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + #ifdef __SYCL_DEVICE_ONLY__ extern "C" { diff --git a/sycl/include/sycl/builtins_legacy_marray_vec.hpp b/sycl/include/sycl/builtins_legacy_marray_vec.hpp new file mode 100644 index 0000000000000..87917e047cefd --- /dev/null +++ b/sycl/include/sycl/builtins_legacy_marray_vec.hpp @@ -0,0 +1,1291 @@ +//==--- builtins_legacy_marray_vec.hpp - Old SYCL built-in nd definitions --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#error "Legacy builtins must not be used in preview." +#endif + +#pragma once + +#include // for address_space, decorated +#include // for half +#include // for scalar builtin variants +#include // for to_vec, to_marray... +#include // for Boolean +#include // for __invoke_select, __in... +#include // for __SYCL_ALWAYS_INLINE +#include // for is_svgenfloat, is_sge... +#include // for is_contained, type_list +#include // for make_larger_t, marray... +#include // for half, intel +#include // for marray +#include // for address_space_cast +#include // for vec + +namespace sycl { +inline namespace _V1 { + +#ifdef __SYCL_DEVICE_ONLY__ +#define __sycl_std +#else +namespace __sycl_std = __host_std; +#endif + +#ifdef __FAST_MATH__ +#define __FAST_MATH_GENFLOAT(T) \ + (detail::is_svgenfloatd_v || detail::is_svgenfloath_v) +#define __FAST_MATH_SGENFLOAT(T) \ + (std::is_same_v || std::is_same_v) +#else +#define __FAST_MATH_GENFLOAT(T) (detail::is_svgenfloat_v) +#define __FAST_MATH_SGENFLOAT(T) (detail::is_sgenfloat_v) +#endif + +/* ------------------ 4.13.3 Math functions. ---------------------------------*/ +// These macros for marray math function implementations use vectorizations of +// size two as a simple general optimization. A more complex implementation +// using larger vectorizations for large marray sizes is possible; however more +// testing is required in order to ascertain the performance implications for +// all backends. +#define __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + vec partial_res = \ + __sycl_std::__invoke_##NAME>(detail::to_vec2(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1]); \ + } \ + return res; + +#define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t, marray> \ + NAME(marray x) { \ + __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \ + } + +__SYCL_MATH_FUNCTION_OVERLOAD(cospi) +__SYCL_MATH_FUNCTION_OVERLOAD(sinpi) +__SYCL_MATH_FUNCTION_OVERLOAD(tanpi) +__SYCL_MATH_FUNCTION_OVERLOAD(sinh) +__SYCL_MATH_FUNCTION_OVERLOAD(cosh) +__SYCL_MATH_FUNCTION_OVERLOAD(tanh) +__SYCL_MATH_FUNCTION_OVERLOAD(asin) +__SYCL_MATH_FUNCTION_OVERLOAD(acos) +__SYCL_MATH_FUNCTION_OVERLOAD(atan) +__SYCL_MATH_FUNCTION_OVERLOAD(asinpi) +__SYCL_MATH_FUNCTION_OVERLOAD(acospi) +__SYCL_MATH_FUNCTION_OVERLOAD(atanpi) +__SYCL_MATH_FUNCTION_OVERLOAD(asinh) +__SYCL_MATH_FUNCTION_OVERLOAD(acosh) +__SYCL_MATH_FUNCTION_OVERLOAD(atanh) +__SYCL_MATH_FUNCTION_OVERLOAD(cbrt) +__SYCL_MATH_FUNCTION_OVERLOAD(ceil) +__SYCL_MATH_FUNCTION_OVERLOAD(floor) +__SYCL_MATH_FUNCTION_OVERLOAD(erfc) +__SYCL_MATH_FUNCTION_OVERLOAD(erf) +__SYCL_MATH_FUNCTION_OVERLOAD(expm1) +__SYCL_MATH_FUNCTION_OVERLOAD(tgamma) +__SYCL_MATH_FUNCTION_OVERLOAD(lgamma) +__SYCL_MATH_FUNCTION_OVERLOAD(log1p) +__SYCL_MATH_FUNCTION_OVERLOAD(logb) +__SYCL_MATH_FUNCTION_OVERLOAD(rint) +__SYCL_MATH_FUNCTION_OVERLOAD(round) +__SYCL_MATH_FUNCTION_OVERLOAD(trunc) +__SYCL_MATH_FUNCTION_OVERLOAD(fabs) + +#undef __SYCL_MATH_FUNCTION_OVERLOAD + +// __SYCL_MATH_FUNCTION_OVERLOAD_FM cases are replaced by corresponding native +// implementations when the -ffast-math flag is used with float. +#define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t<__FAST_MATH_SGENFLOAT(T), marray> \ + NAME(marray x) { \ + __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \ + } + +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sin) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(cos) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(tan) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sqrt) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) + +#undef __SYCL_MATH_FUNCTION_OVERLOAD_FM +#undef __SYCL_MATH_FUNCTION_OVERLOAD_IMPL + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + ilogb(marray x) { + marray res; + for (size_t i = 0; i < N / 2; i++) { + vec partial_res = + __sycl_std::__invoke_ilogb>(detail::to_vec2(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); + } + if (N % 2) { + res[N - 1] = __sycl_std::__invoke_ilogb(x[N - 1]); + } + return res; +} + +#define __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; + +#define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t, marray> \ + NAME(marray x, marray y) { \ + __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \ + } + +__SYCL_MATH_FUNCTION_2_OVERLOAD(atan2) +__SYCL_MATH_FUNCTION_2_OVERLOAD(atan2pi) +__SYCL_MATH_FUNCTION_2_OVERLOAD(copysign) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fdim) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fmin) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fmax) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fmod) +__SYCL_MATH_FUNCTION_2_OVERLOAD(hypot) +__SYCL_MATH_FUNCTION_2_OVERLOAD(maxmag) +__SYCL_MATH_FUNCTION_2_OVERLOAD(minmag) +__SYCL_MATH_FUNCTION_2_OVERLOAD(nextafter) +__SYCL_MATH_FUNCTION_2_OVERLOAD(pow) +__SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) + +#undef __SYCL_MATH_FUNCTION_2_OVERLOAD + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t<__FAST_MATH_SGENFLOAT(T), marray> + powr(marray x, marray y) { + __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(powr) +} + +#undef __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL + +#define __SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t, marray> \ + NAME(marray x, T y) { \ + marray res; \ + sycl::vec y_vec{y, y}; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec2(x, i * 2), y_vec); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y_vec[0]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(fmax) +// clang-format off +__SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(fmin) + +#undef __SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + ldexp(marray x, marray k) { + // clang-format on + marray res; + for (size_t i = 0; i < N; i++) { + res[i] = __sycl_std::__invoke_ldexp(x[i], k[i]); + } + return res; +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + ldexp(marray x, int k) { + marray res; + for (size_t i = 0; i < N; i++) { + res[i] = __sycl_std::__invoke_ldexp(x[i], k); + } + return res; +} + +#define __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N; i++) { \ + res[i] = __sycl_std::__invoke_##NAME(x[i], y[i]); \ + } \ + return res; + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + pown(marray x, marray y) { + __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(pown) +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + rootn(marray x, marray y){ + __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(rootn)} + +#undef __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL + +#define __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N; i++) { \ + res[i] = __sycl_std::__invoke_##NAME(x[i], y); \ + } \ + return res; + +template +inline __SYCL_ALWAYS_INLINE std::enable_if_t, + marray> pown(marray x, + int y) { + __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(pown) +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + rootn(marray x, int y) { + __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(rootn) +} + +#undef __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL + +#define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t, marray> \ + NAME(marray x, marray y, marray z) { \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2), \ + detail::to_vec2(z, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = \ + __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1], z[N - 1]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) +__SYCL_MATH_FUNCTION_3_OVERLOAD(mix) +__SYCL_MATH_FUNCTION_3_OVERLOAD(fma) + +#undef __SYCL_MATH_FUNCTION_3_OVERLOAD + +// svgenfloat fmax (svgenfloat x, sgenfloat y) +template +std::enable_if_t, T> +fmax(T x, typename T::element_type y) { + return __sycl_std::__invoke_fmax(x, T(y)); +} + +// svgenfloat fmin (svgenfloat x, sgenfloat y) +template +std::enable_if_t, T> +fmin(T x, typename T::element_type y) { + return __sycl_std::__invoke_fmin(x, T(y)); +} + +// vgenfloat ldexp (vgenfloat x, int k) +template +std::enable_if_t, T> ldexp(T x, int k) { + return __sycl_std::__invoke_ldexp(x, vec(k)); +} + +// vgenfloat ldexp (vgenfloat x, genint k) +template +std::enable_if_t && detail::is_intn_v, T> +ldexp(T x, T2 k) { + detail::check_vector_size(); + return __sycl_std::__invoke_ldexp(x, k); +} + +// other marray math functions + +// TODO: can be optimized in the way marray math functions above are optimized +// (usage of vec) +#define __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, ARGPTR, \ + ...) \ + marray res; \ + for (int j = 0; j < N; j++) { \ + res[j] = \ + NAME(__VA_ARGS__, \ + address_space_cast>(&(*ARGPTR)[j])); \ + } \ + return res; + +#define __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t< \ + detail::is_svgenfloat_v && \ + detail::is_genfloatptr_marray_v, \ + marray> \ + NAME(marray ARG1, multi_ptr ARG2) { \ + __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, ARG2, \ + __VA_ARGS__) \ + } + +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD(fract, x, iptr, + x[j]) +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD(modf, x, iptr, + x[j]) +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENFLOATPTR_OVERLOAD(sincos, x, + cosval, x[j]) + +#undef __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_GENFLOATPTR_OVERLOAD + +#define __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENINTPTR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t< \ + detail::is_svgenfloat_v && \ + detail::is_genintptr_marray_v, \ + marray> \ + NAME(marray ARG1, multi_ptr ARG2) { \ + __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, ARG2, \ + __VA_ARGS__) \ + } + +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENINTPTR_OVERLOAD(frexp, x, exp, + x[j]) +__SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_ARG_GENINTPTR_OVERLOAD(lgamma_r, x, signp, + x[j]) + +#undef __SYCL_MARRAY_MATH_FUNCTION_BINOP_2ND_GENINTPTR_OVERLOAD + +#define __SYCL_MARRAY_MATH_FUNCTION_REMQUO_OVERLOAD(NAME, ...) \ + template \ + std::enable_if_t< \ + detail::is_svgenfloat_v && \ + detail::is_genintptr_marray_v, \ + marray> \ + NAME(marray x, marray y, \ + multi_ptr quo) { \ + __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL(NAME, quo, \ + __VA_ARGS__) \ + } + +__SYCL_MARRAY_MATH_FUNCTION_REMQUO_OVERLOAD(remquo, x[j], y[j]) + +#undef __SYCL_MARRAY_MATH_FUNCTION_REMQUO_OVERLOAD + +#undef __SYCL_MARRAY_MATH_FUNCTION_W_GENPTR_ARG_OVERLOAD_IMPL + +template +std::enable_if_t, marray, N>> +nan(marray nancode) { + marray, N> res; + for (int j = 0; j < N; j++) { + res[j] = nan(nancode[j]); + } + return res; +} + +/* --------------- 4.13.5 Common functions. ---------------------------------*/ +// vgenfloath clamp (vgenfloath x, half minval, half maxval) +// vgenfloatf clamp (vgenfloatf x, float minval, float maxval) +// vgenfloatd clamp (vgenfloatd x, double minval, double maxval) +template +std::enable_if_t, T> +clamp(T x, typename T::element_type minval, typename T::element_type maxval) { + return __sycl_std::__invoke_fclamp(x, T(minval), T(maxval)); +} + +// vgenfloatf max (vgenfloatf x, float y) +// vgenfloatd max (vgenfloatd x, double y) +// vgenfloath max (vgenfloath x, half y) +template +std::enable_if_t, T>(max)( + T x, typename T::element_type y) { + return __sycl_std::__invoke_fmax_common(x, T(y)); +} + +// vgenfloatf min (vgenfloatf x, float y) +// vgenfloatd min (vgenfloatd x, double y) +// vgenfloath min (vgenfloath x, half y) +template +std::enable_if_t, T>(min)( + T x, typename T::element_type y) { + return __sycl_std::__invoke_fmin_common(x, T(y)); +} + +// vgenfloatf mix (vgenfloatf x, vgenfloatf y, float a) +// vgenfloatd mix (vgenfloatd x, vgenfloatd y, double a) +// vgenfloatd mix (vgenfloath x, vgenfloath y, half a) +template +std::enable_if_t, T> mix(T x, T y, + typename T::element_type a) { + return __sycl_std::__invoke_mix(x, y, T(a)); +} + +// vgenfloatf step (float edge, vgenfloatf x) +// vgenfloatd step (double edge, vgenfloatd x) +// vgenfloatd step (half edge, vgenfloath x) +template +std::enable_if_t, T> +step(typename T::element_type edge, T x) { + return __sycl_std::__invoke_step(T(edge), x); +} + +// vgenfloatf smoothstep (float edge0, float edge1, vgenfloatf x) +// vgenfloatd smoothstep (double edge0, double edge1, vgenfloatd x) +// vgenfloath smoothstep (half edge0, half edge1, vgenfloath x) +template +std::enable_if_t, T> +smoothstep(typename T::element_type edge0, typename T::element_type edge1, + T x) { + return __sycl_std::__invoke_smoothstep(T(edge0), T(edge1), x); +} + +// marray common functions + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + T res; \ + for (int i = 0; i < T::size(); i++) { \ + res[i] = NAME(__VA_ARGS__); \ + } \ + return res; + +#define __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(NAME, ARG, ...) \ + template >> \ + T NAME(ARG) { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(degrees, T radians, radians[i]) +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(radians, T degrees, degrees[i]) +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(sign, T x, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD + +#define __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template >> \ + T NAME(ARG1, ARG2) { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +// min and max may be defined as macros, so we wrap them in parentheses to avoid +// errors. +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((min), T x, T y, x[i], y[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((min), T x, + detail::marray_element_t y, + x[i], y) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((max), T x, T y, x[i], y[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((max), T x, + detail::marray_element_t y, + x[i], y) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(step, T edge, T x, edge[i], x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(step, + detail::marray_element_t edge, + T x, edge, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD + +#define __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(NAME, ARG1, ARG2, ARG3, \ + ...) \ + template >> \ + T NAME(ARG1, ARG2, ARG3) { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(clamp, T x, T minval, T maxval, + x[i], minval[i], maxval[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(clamp, T x, + detail::marray_element_t minval, + detail::marray_element_t maxval, + x[i], minval, maxval) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, T a, x[i], y[i], + a[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, + detail::marray_element_t a, + x[i], y[i], a) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(smoothstep, T edge0, T edge1, T x, + edge0[i], edge1[i], x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(smoothstep, + detail::marray_element_t edge0, + detail::marray_element_t edge1, + T x, edge0, edge1, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD +#undef __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL + +/* --------------- 4.13.4 Integer functions. --------------------------------*/ +// igeninteger abs (geninteger x) +template +std::enable_if_t, T> abs(T x) { + auto res = __sycl_std::__invoke_s_abs>(x); + if constexpr (detail::is_vigeninteger_v) { + return res.template convert>(); + } else + return detail::make_signed_t(res); +} + +// geninteger clamp (geninteger x, sgeninteger minval, sgeninteger maxval) +template +std::enable_if_t, T> +clamp(T x, typename T::element_type minval, typename T::element_type maxval) { + return __sycl_std::__invoke_s_clamp(x, T(minval), T(maxval)); +} + +// geninteger clamp (geninteger x, sgeninteger minval, sgeninteger maxval) +template +std::enable_if_t, T> +clamp(T x, typename T::element_type minval, typename T::element_type maxval) { + return __sycl_std::__invoke_u_clamp(x, T(minval), T(maxval)); +} + +// igeninteger max (vigeninteger x, sigeninteger y) +template +std::enable_if_t, T>(max)( + T x, typename T::element_type y) { + return __sycl_std::__invoke_s_max(x, T(y)); +} + +// vugeninteger max (vugeninteger x, sugeninteger y) +template +std::enable_if_t, T>(max)( + T x, typename T::element_type y) { + return __sycl_std::__invoke_u_max(x, T(y)); +} + +// vigeninteger min (vigeninteger x, sigeninteger y) +template +std::enable_if_t, T>(min)( + T x, typename T::element_type y) { + return __sycl_std::__invoke_s_min(x, T(y)); +} + +// vugeninteger min (vugeninteger x, sugeninteger y) +template +std::enable_if_t, T>(min)( + T x, typename T::element_type y) { + return __sycl_std::__invoke_u_min(x, T(y)); +} + +// marray integer functions + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + marray res; \ + for (int j = 0; j < N; j++) { \ + res[j] = NAME(__VA_ARGS__); \ + } \ + return res; + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_ABS_U_OVERLOAD(NAME, ARG, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_ABS_I_OVERLOAD(NAME, ARG, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_ABS_U_OVERLOAD(abs, x, x[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_ABS_I_OVERLOAD(abs, x, x[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_ABS_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_ABS_U_OVERLOAD + +#define __SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(NAME, ARG, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(clz, x, x[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(ctz, x, x[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(popcount, x, x[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD(NAME, ARG1, \ + ARG2, ...) \ + template \ + std::enable_if_t, \ + marray, N>> \ + NAME(marray ARG1, marray ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, T ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, T ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(abs_diff, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD(abs_diff, x, y, x[j], + y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(add_sat, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(add_sat, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(hadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(hadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(rhadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(rhadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD((max), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD((max), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD((max), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD((max), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD((min), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD((min), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD((min), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD((min), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(mul_hi, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(mul_hi, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(rotate, v, i, v[j], i[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(rotate, v, i, v[j], i[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(sub_sat, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(sub_sat, x, y, x[j], y[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_2ND_3RD_ARGS_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ARG3, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, T ARG2, T ARG3) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_2ND_3RD_ARGS_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ARG3, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, T ARG2, T ARG3) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(clamp, x, minval, maxval, x[j], + minval[j], maxval[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(clamp, x, minval, maxval, x[j], + minval[j], maxval[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_2ND_3RD_ARGS_SCALAR_OVERLOAD( + clamp, x, minval, maxval, x[j], minval, maxval) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_2ND_3RD_ARGS_SCALAR_OVERLOAD( + clamp, x, minval, maxval, x[j], minval, maxval) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(mad_hi, a, b, c, a[j], b[j], + c[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(mad_hi, a, b, c, a[j], b[j], + c[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(mad_sat, a, b, c, a[j], b[j], + c[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(mad_sat, a, b, c, a[j], b[j], + c[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_2ND_3RD_ARGS_SCALAR_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_2ND_3RD_ARGS_SCALAR_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_U_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_I_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_MAD24_U_OVERLOAD(mad24, x, y, z, x[j], y[j], + z[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_MAD24_I_OVERLOAD(mad24, x, y, z, x[j], y[j], + z[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_U_OVERLOAD + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_U_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_I_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t, marray> NAME( \ + marray ARG1, marray ARG2) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_MUL24_U_OVERLOAD(mul24, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_MUL24_I_OVERLOAD(mul24, x, y, x[j], y[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_U_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL(NAME) \ + detail::make_larger_t> res; \ + for (int j = 0; j < N; j++) { \ + res[j] = NAME(hi[j], lo[j]); \ + } \ + return res; + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(NAME, KBIT) \ + template \ + std::enable_if_t, \ + detail::make_larger_t>> \ + NAME(marray hi, marray lo) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL(NAME) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(NAME, KBIT) \ + template \ + std::enable_if_t && \ + detail::is_ugeninteger##KBIT##_v, \ + detail::make_larger_t>> \ + NAME(marray hi, marray lo) { \ + __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL(NAME) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(upsample, 8bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(upsample, 8bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(upsample, 16bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(upsample, 16bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(upsample, 32bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(upsample, 32bit) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL + +/* --------------- 4.13.6 Geometric Functions. ------------------------------*/ +// float3 cross (float3 p0, float3 p1) +// float4 cross (float4 p0, float4 p1) +// double3 cross (double3 p0, double3 p1) +// double4 cross (double4 p0, double4 p1) +// half3 cross (half3 p0, half3 p1) +// half4 cross (half4 p0, half4 p1) +template +std::enable_if_t, T> cross(T p0, T p1) { + return __sycl_std::__invoke_cross(p0, p1); +} + +// float dot (vgengeofloat p0, vgengeofloat p1) +template +std::enable_if_t, float> dot(T p0, T p1) { + return __sycl_std::__invoke_Dot(p0, p1); +} + +// double dot (vgengeodouble p0, vgengeodouble p1) +template +std::enable_if_t, double> dot(T p0, T p1) { + return __sycl_std::__invoke_Dot(p0, p1); +} + +// half dot (vgengeohalf p0, vgengeohalf p1) +template +std::enable_if_t, half> dot(T p0, T p1) { + return __sycl_std::__invoke_Dot(p0, p1); +} + +// float distance (gengeofloat p0, gengeofloat p1) +template , T>> +float distance(T p0, T p1) { + return __sycl_std::__invoke_distance(p0, p1); +} + +// double distance (gengeodouble p0, gengeodouble p1) +template , T>> +double distance(T p0, T p1) { + return __sycl_std::__invoke_distance(p0, p1); +} + +// half distance (gengeohalf p0, gengeohalf p1) +template , T>> +half distance(T p0, T p1) { + return __sycl_std::__invoke_distance(p0, p1); +} + +// float length (gengeofloat p) +template , T>> +float length(T p) { + return __sycl_std::__invoke_length(p); +} + +// double length (gengeodouble p) +template , T>> +double length(T p) { + return __sycl_std::__invoke_length(p); +} + +// half length (gengeohalf p) +template , T>> +half length(T p) { + return __sycl_std::__invoke_length(p); +} + +// gengeofloat normalize (gengeofloat p) +template +std::enable_if_t, T> normalize(T p) { + return __sycl_std::__invoke_normalize(p); +} + +// gengeodouble normalize (gengeodouble p) +template +std::enable_if_t, T> normalize(T p) { + return __sycl_std::__invoke_normalize(p); +} + +// gengeohalf normalize (gengeohalf p) +template +std::enable_if_t, T> normalize(T p) { + return __sycl_std::__invoke_normalize(p); +} + +// float fast_distance (gengeofloat p0, gengeofloat p1) +template , T>> +float fast_distance(T p0, T p1) { + return __sycl_std::__invoke_fast_distance(p0, p1); +} + +// double fast_distance (gengeodouble p0, gengeodouble p1) +template , T>> +double fast_distance(T p0, T p1) { + return __sycl_std::__invoke_fast_distance(p0, p1); +} + +// float fast_length (gengeofloat p) +template , T>> +float fast_length(T p) { + return __sycl_std::__invoke_fast_length(p); +} + +// double fast_length (gengeodouble p) +template , T>> +double fast_length(T p) { + return __sycl_std::__invoke_fast_length(p); +} + +// gengeofloat fast_normalize (gengeofloat p) +template +std::enable_if_t, T> fast_normalize(T p) { + return __sycl_std::__invoke_fast_normalize(p); +} + +// gengeodouble fast_normalize (gengeodouble p) +template +std::enable_if_t, T> fast_normalize(T p) { + return __sycl_std::__invoke_fast_normalize(p); +} + +// marray geometric functions + +#define __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + vec, T::size()> result_v; \ + result_v = NAME(__VA_ARGS__); \ + return detail::to_marray(result_v); + +template +std::enable_if_t, T> cross(T p0, T p1) { + __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(cross, detail::to_vec(p0), + detail::to_vec(p1)) +} + +template +std::enable_if_t, T> normalize(T p) { + __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(normalize, detail::to_vec(p)) +} + +template +std::enable_if_t, T> fast_normalize(T p) { + __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL(fast_normalize, + detail::to_vec(p)) +} + +#undef __SYCL_MARRAY_GEOMETRIC_FUNCTION_OVERLOAD_IMPL + +#define __SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD(NAME) \ + template \ + std::enable_if_t, detail::marray_element_t> \ + NAME(T p0, T p1) { \ + return NAME(detail::to_vec(p0), detail::to_vec(p1)); \ + } + +// clang-format off +__SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD(dot) +__SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD(distance) +// clang-format on + +#undef __SYCL_MARRAY_GEOMETRIC_FUNCTION_IS_GENGEOMARRAY_BINOP_OVERLOAD + +template +std::enable_if_t, detail::marray_element_t> +length(T p) { + return __sycl_std::__invoke_length>( + detail::to_vec(p)); +} + +template +std::enable_if_t, detail::marray_element_t> +fast_distance(T p0, T p1) { + return fast_distance(detail::to_vec(p0), detail::to_vec(p1)); +} + +template +std::enable_if_t, detail::marray_element_t> +fast_length(T p) { + return fast_length(detail::to_vec(p)); +} + +/* SYCL 1.2.1 ---- 4.13.7 Relational functions. -----------------------------*/ +/* SYCL 2020 ---- 4.17.9 Relational functions. -----------------------------*/ + +// marray relational functions + +#define __SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(NAME) \ + template >> \ + sycl::marray NAME(T x, T y) { \ + sycl::marray res; \ + for (int i = 0; i < x.size(); i++) { \ + res[i] = NAME(x[i], y[i]); \ + } \ + return res; \ + } + +#define __SYCL_MARRAY_RELATIONAL_FUNCTION_UNOP_OVERLOAD(NAME) \ + template >> \ + sycl::marray NAME(T x) { \ + sycl::marray res; \ + for (int i = 0; i < x.size(); i++) { \ + res[i] = NAME(x[i]); \ + } \ + return res; \ + } + +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isequal) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isnotequal) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isgreater) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isgreaterequal) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isless) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(islessequal) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(islessgreater) +__SYCL_MARRAY_RELATIONAL_FUNCTION_UNOP_OVERLOAD(isfinite) +__SYCL_MARRAY_RELATIONAL_FUNCTION_UNOP_OVERLOAD(isinf) +__SYCL_MARRAY_RELATIONAL_FUNCTION_UNOP_OVERLOAD(isnan) +__SYCL_MARRAY_RELATIONAL_FUNCTION_UNOP_OVERLOAD(isnormal) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isordered) +__SYCL_MARRAY_RELATIONAL_FUNCTION_BINOP_OVERLOAD(isunordered) +__SYCL_MARRAY_RELATIONAL_FUNCTION_UNOP_OVERLOAD(signbit) + +// int any (vigeninteger x) +template +std::enable_if_t, int> any(T x) { + return detail::rel_sign_bit_test_ret_t( + __sycl_std::__invoke_Any>( + detail::rel_sign_bit_test_arg_t(x))); +} + +// int all (vigeninteger x) +template +std::enable_if_t, int> all(T x) { + return detail::rel_sign_bit_test_ret_t( + __sycl_std::__invoke_All>( + detail::rel_sign_bit_test_arg_t(x))); +} + +// other marray relational functions + +template +std::enable_if_t, bool> any(marray x) { + return std::any_of(x.begin(), x.end(), [](T i) { return any(i); }); +} + +template +std::enable_if_t, bool> all(marray x) { + return std::all_of(x.begin(), x.end(), [](T i) { return all(i); }); +} + +template +std::enable_if_t, marray> +bitselect(marray a, marray b, marray c) { + marray res; + for (int i = 0; i < N; i++) { + res[i] = bitselect(a[i], b[i], c[i]); + } + return res; +} + +template +std::enable_if_t, marray> +select(marray a, marray b, marray c) { + marray res; + for (int i = 0; i < N; i++) { + res[i] = select(a[i], b[i], c[i]); + } + return res; +} + +namespace native { +/* ----------------- 4.13.3 Math functions. ---------------------------------*/ + +#define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE marray NAME(marray x) { \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_native_##NAME>( \ + detail::to_vec2(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_native_##NAME(x[N - 1]); \ + } \ + return res; \ + } + +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(sin) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(cos) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(tan) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp10) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log2) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log10) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(sqrt) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(rsqrt) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(recip) + +#undef __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD + +#define __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE marray NAME(marray x, \ + marray y) { \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_native_##NAME>( \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = \ + __sycl_std::__invoke_native_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; \ + } + +__SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(divide) +__SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(powr) + +#undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD + +} // namespace native +namespace half_precision { +/* ----------------- 4.13.3 Math functions. ---------------------------------*/ +#define __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE marray NAME(marray x) { \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_half_##NAME>( \ + detail::to_vec2(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_half_##NAME(x[N - 1]); \ + } \ + return res; \ + } + +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(sin) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(cos) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(tan) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp10) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log2) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log10) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(sqrt) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(rsqrt) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(recip) + +#undef __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD + +#define __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE marray NAME(marray x, \ + marray y) { \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_half_##NAME>( \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = \ + __sycl_std::__invoke_half_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; \ + } + +__SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(divide) +__SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(powr) + +#undef __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD + +} // namespace half_precision + +#ifdef __FAST_MATH__ +/* ----------------- -ffast-math functions. ---------------------------------*/ + +#define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t, marray> \ + NAME(marray x) { \ + return native::NAME(x); \ + } + +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sin) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(cos) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(tan) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sqrt) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) +#undef __SYCL_MATH_FUNCTION_OVERLOAD_FM + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t, marray> + powr(marray x, marray y) { + return native::powr(x, y); +} + +#endif // __FAST_MATH__ +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/builtins_legacy_scalar.hpp b/sycl/include/sycl/builtins_legacy_scalar.hpp new file mode 100644 index 0000000000000..18141b0261871 --- /dev/null +++ b/sycl/include/sycl/builtins_legacy_scalar.hpp @@ -0,0 +1,1256 @@ +//==--- builtins_legacy_scalar.hpp - Old SYCL built-in scalar definitions --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#error "Legacy builtins must not be used in preview." +#endif + +#pragma once + +#include // for address_space, decorated +#include // for half +#include // for Boolean +#include // for __invoke_select, __in... +#include // for __SYCL_ALWAYS_INLINE +#include // for is_svgenfloat, is_sge... +#include // for is_contained, type_list +#include // for half, intel +#include // for address_space_cast + +namespace sycl { +inline namespace _V1 { + +#ifdef __SYCL_DEVICE_ONLY__ +#define __sycl_std +#else +namespace __sycl_std = __host_std; +#endif + +#ifdef __FAST_MATH__ +#define __FAST_MATH_GENFLOAT(T) \ + (detail::is_svgenfloatd_v || detail::is_svgenfloath_v) +#define __FAST_MATH_SGENFLOAT(T) \ + (std::is_same_v || std::is_same_v) +#else +#define __FAST_MATH_GENFLOAT(T) (detail::is_svgenfloat_v) +#define __FAST_MATH_SGENFLOAT(T) (detail::is_sgenfloat_v) +#endif + +/* ------------------ 4.13.3 Math functions. ---------------------------------*/ + +// svgenfloat acos (svgenfloat x) +template +std::enable_if_t, T> acos(T x) { + return __sycl_std::__invoke_acos(x); +} + +// svgenfloat acosh (svgenfloat x) +template +std::enable_if_t, T> acosh(T x) { + return __sycl_std::__invoke_acosh(x); +} + +// svgenfloat acospi (svgenfloat x) +template +std::enable_if_t, T> acospi(T x) { + return __sycl_std::__invoke_acospi(x); +} + +// svgenfloat asin (svgenfloat x) +template +std::enable_if_t, T> asin(T x) { + return __sycl_std::__invoke_asin(x); +} + +// svgenfloat asinh (svgenfloat x) +template +std::enable_if_t, T> asinh(T x) { + return __sycl_std::__invoke_asinh(x); +} + +// svgenfloat asinpi (svgenfloat x) +template +std::enable_if_t, T> asinpi(T x) { + return __sycl_std::__invoke_asinpi(x); +} + +// svgenfloat atan (svgenfloat y_over_x) +template +std::enable_if_t, T> atan(T y_over_x) { + return __sycl_std::__invoke_atan(y_over_x); +} + +// svgenfloat atan2 (svgenfloat y, svgenfloat x) +template +std::enable_if_t, T> atan2(T y, T x) { + return __sycl_std::__invoke_atan2(y, x); +} + +// svgenfloat atanh (svgenfloat x) +template +std::enable_if_t, T> atanh(T x) { + return __sycl_std::__invoke_atanh(x); +} + +// svgenfloat atanpi (svgenfloat x) +template +std::enable_if_t, T> atanpi(T x) { + return __sycl_std::__invoke_atanpi(x); +} + +// svgenfloat atan2pi (svgenfloat y, svgenfloat x) +template +std::enable_if_t, T> atan2pi(T y, T x) { + return __sycl_std::__invoke_atan2pi(y, x); +} + +// svgenfloat cbrt (svgenfloat x) +template +std::enable_if_t, T> cbrt(T x) { + return __sycl_std::__invoke_cbrt(x); +} + +// svgenfloat ceil (svgenfloat x) +template +std::enable_if_t, T> ceil(T x) { + return __sycl_std::__invoke_ceil(x); +} + +// svgenfloat copysign (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> copysign(T x, T y) { + return __sycl_std::__invoke_copysign(x, y); +} + +// svgenfloat cos (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> cos(T x) { + return __sycl_std::__invoke_cos(x); +} + +// svgenfloat cosh (svgenfloat x) +template +std::enable_if_t, T> cosh(T x) { + return __sycl_std::__invoke_cosh(x); +} + +// svgenfloat cospi (svgenfloat x) +template +std::enable_if_t, T> cospi(T x) { + return __sycl_std::__invoke_cospi(x); +} + +// svgenfloat erfc (svgenfloat x) +template +std::enable_if_t, T> erfc(T x) { + return __sycl_std::__invoke_erfc(x); +} + +// svgenfloat erf (svgenfloat x) +template std::enable_if_t, T> erf(T x) { + return __sycl_std::__invoke_erf(x); +} + +// svgenfloat exp (svgenfloat x ) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> exp(T x) { + return __sycl_std::__invoke_exp(x); +} + +// svgenfloat exp2 (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> exp2(T x) { + return __sycl_std::__invoke_exp2(x); +} + +// svgenfloat exp10 (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> exp10(T x) { + return __sycl_std::__invoke_exp10(x); +} + +// svgenfloat expm1 (svgenfloat x) +template +std::enable_if_t, T> expm1(T x) { + return __sycl_std::__invoke_expm1(x); +} + +// svgenfloat fabs (svgenfloat x) +template +std::enable_if_t, T> fabs(T x) { + return __sycl_std::__invoke_fabs(x); +} + +// svgenfloat fdim (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> fdim(T x, T y) { + return __sycl_std::__invoke_fdim(x, y); +} + +// svgenfloat floor (svgenfloat x) +template +std::enable_if_t, T> floor(T x) { + return __sycl_std::__invoke_floor(x); +} + +// svgenfloat fma (svgenfloat a, svgenfloat b, svgenfloat c) +template +std::enable_if_t, T> fma(T a, T b, T c) { + return __sycl_std::__invoke_fma(a, b, c); +} + +// svgenfloat fmax (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> fmax(T x, T y) { + return __sycl_std::__invoke_fmax(x, y); +} + +// svgenfloat fmin (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> fmin(T x, T y) { + return __sycl_std::__invoke_fmin(x, y); +} + +// svgenfloat fmod (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> fmod(T x, T y) { + return __sycl_std::__invoke_fmod(x, y); +} + +// svgenfloat fract (svgenfloat x, genfloatptr iptr) +template +std::enable_if_t && detail::is_genfloatptr_v, T> +fract(T x, T2 iptr) { + detail::check_vector_size(); + return __sycl_std::__invoke_fract(x, iptr); +} + +// svgenfloat frexp (svgenfloat x, genintptr exp) +template +std::enable_if_t && detail::is_genintptr_v, T> +frexp(T x, T2 exp) { + detail::check_vector_size(); + return __sycl_std::__invoke_frexp(x, exp); +} + +// svgenfloat hypot (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> hypot(T x, T y) { + return __sycl_std::__invoke_hypot(x, y); +} + +// genint ilogb (svgenfloat x) +template , T>> +detail::change_base_type_t ilogb(T x) { + return __sycl_std::__invoke_ilogb>(x); +} + +// float ldexp (float x, int k) +// double ldexp (double x, int k) +// half ldexp (half x, int k) +template +std::enable_if_t, T> ldexp(T x, int k) { + return __sycl_std::__invoke_ldexp(x, k); +} + +// svgenfloat lgamma (svgenfloat x) +template +std::enable_if_t, T> lgamma(T x) { + return __sycl_std::__invoke_lgamma(x); +} + +// svgenfloat lgamma_r (svgenfloat x, genintptr signp) +template +std::enable_if_t && detail::is_genintptr_v, T> +lgamma_r(T x, T2 signp) { + detail::check_vector_size(); + return __sycl_std::__invoke_lgamma_r(x, signp); +} + +// svgenfloat log (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> log(T x) { + return __sycl_std::__invoke_log(x); +} + +// svgenfloat log2 (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> log2(T x) { + return __sycl_std::__invoke_log2(x); +} + +// svgenfloat log10 (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> log10(T x) { + return __sycl_std::__invoke_log10(x); +} + +// svgenfloat log1p (svgenfloat x) +template +std::enable_if_t, T> log1p(T x) { + return __sycl_std::__invoke_log1p(x); +} + +// svgenfloat logb (svgenfloat x) +template +std::enable_if_t, T> logb(T x) { + return __sycl_std::__invoke_logb(x); +} + +// svgenfloat mad (svgenfloat a, svgenfloat b, svgenfloat c) +template +std::enable_if_t, T> mad(T a, T b, T c) { + return __sycl_std::__invoke_mad(a, b, c); +} + +// svgenfloat maxmag (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> maxmag(T x, T y) { + return __sycl_std::__invoke_maxmag(x, y); +} + +// svgenfloat minmag (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> minmag(T x, T y) { + return __sycl_std::__invoke_minmag(x, y); +} + +// svgenfloat modf (svgenfloat x, genfloatptr iptr) +template +std::enable_if_t && detail::is_genfloatptr_v, T> +modf(T x, T2 iptr) { + detail::check_vector_size(); + return __sycl_std::__invoke_modf(x, iptr); +} + +template , T>> +detail::nan_return_t nan(T nancode) { + return __sycl_std::__invoke_nan>( + detail::convert_data_type>()(nancode)); +} + +// svgenfloat nextafter (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> nextafter(T x, T y) { + return __sycl_std::__invoke_nextafter(x, y); +} + +// svgenfloat pow (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> pow(T x, T y) { + return __sycl_std::__invoke_pow(x, y); +} + +// svgenfloat pown (svgenfloat x, genint y) +template +std::enable_if_t && detail::is_genint_v, T> +pown(T x, T2 y) { + detail::check_vector_size(); + return __sycl_std::__invoke_pown(x, y); +} + +// svgenfloat powr (svgenfloat x, svgenfloat y) +template +std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> powr(T x, T y) { + return __sycl_std::__invoke_powr(x, y); +} + +// svgenfloat remainder (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T> remainder(T x, T y) { + return __sycl_std::__invoke_remainder(x, y); +} + +// svgenfloat remquo (svgenfloat x, svgenfloat y, genintptr quo) +template +std::enable_if_t && detail::is_genintptr_v, T> +remquo(T x, T y, T2 quo) { + detail::check_vector_size(); + return __sycl_std::__invoke_remquo(x, y, quo); +} + +// svgenfloat rint (svgenfloat x) +template +std::enable_if_t, T> rint(T x) { + return __sycl_std::__invoke_rint(x); +} + +// svgenfloat rootn (svgenfloat x, genint y) +template +std::enable_if_t && detail::is_genint_v, T> +rootn(T x, T2 y) { + detail::check_vector_size(); + return __sycl_std::__invoke_rootn(x, y); +} + +// svgenfloat round (svgenfloat x) +template +std::enable_if_t, T> round(T x) { + return __sycl_std::__invoke_round(x); +} + +// svgenfloat rsqrt (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> rsqrt(T x) { + return __sycl_std::__invoke_rsqrt(x); +} + +// svgenfloat sin (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> sin(T x) { + return __sycl_std::__invoke_sin(x); +} + +// svgenfloat sincos (svgenfloat x, genfloatptr cosval) +template +std::enable_if_t<__FAST_MATH_GENFLOAT(T) && detail::is_genfloatptr_v, T> +sincos(T x, T2 cosval) { + detail::check_vector_size(); + return __sycl_std::__invoke_sincos(x, cosval); +} + +// svgenfloat sinh (svgenfloat x) +template +std::enable_if_t, T> sinh(T x) { + return __sycl_std::__invoke_sinh(x); +} + +// svgenfloat sinpi (svgenfloat x) +template +std::enable_if_t, T> sinpi(T x) { + return __sycl_std::__invoke_sinpi(x); +} + +// svgenfloat sqrt (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> sqrt(T x) { + return __sycl_std::__invoke_sqrt(x); +} + +// svgenfloat tan (svgenfloat x) +template std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> tan(T x) { + return __sycl_std::__invoke_tan(x); +} + +// svgenfloat tanh (svgenfloat x) +template +std::enable_if_t, T> tanh(T x) { + return __sycl_std::__invoke_tanh(x); +} + +// svgenfloat tanpi (svgenfloat x) +template +std::enable_if_t, T> tanpi(T x) { + return __sycl_std::__invoke_tanpi(x); +} + +// svgenfloat tgamma (svgenfloat x) +template +std::enable_if_t, T> tgamma(T x) { + return __sycl_std::__invoke_tgamma(x); +} + +// svgenfloat trunc (svgenfloat x) +template +std::enable_if_t, T> trunc(T x) { + return __sycl_std::__invoke_trunc(x); +} + +/* --------------- 4.13.5 Common functions. ---------------------------------*/ +// svgenfloat clamp (svgenfloat x, svgenfloat minval, svgenfloat maxval) +template +std::enable_if_t, T> clamp(T x, T minval, T maxval) { + return __sycl_std::__invoke_fclamp(x, minval, maxval); +} + +// svgenfloat degrees (svgenfloat radians) +template +std::enable_if_t, T> degrees(T radians) { + return __sycl_std::__invoke_degrees(radians); +} + +// svgenfloat abs (svgenfloat x) +template +__SYCL_DEPRECATED("abs for floating point types is non-standard and has been " + "deprecated. Please use fabs instead.") +std::enable_if_t, T> abs(T x) { + return __sycl_std::__invoke_fabs(x); +} + +// svgenfloat max (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T>(max)(T x, T y) { + return __sycl_std::__invoke_fmax_common(x, y); +} + +// svgenfloat min (svgenfloat x, svgenfloat y) +template +std::enable_if_t, T>(min)(T x, T y) { + return __sycl_std::__invoke_fmin_common(x, y); +} + +// svgenfloat mix (svgenfloat x, svgenfloat y, svgenfloat a) +template +std::enable_if_t, T> mix(T x, T y, T a) { + return __sycl_std::__invoke_mix(x, y, a); +} + +// svgenfloat radians (svgenfloat degrees) +template +std::enable_if_t, T> radians(T degrees) { + return __sycl_std::__invoke_radians(degrees); +} + +// svgenfloat step (svgenfloat edge, svgenfloat x) +template +std::enable_if_t, T> step(T edge, T x) { + return __sycl_std::__invoke_step(edge, x); +} + +// svgenfloat smoothstep (svgenfloat edge0, svgenfloat edge1, svgenfloat x) +template +std::enable_if_t, T> smoothstep(T edge0, T edge1, + T x) { + return __sycl_std::__invoke_smoothstep(edge0, edge1, x); +} + +// svgenfloat sign (svgenfloat x) +template +std::enable_if_t, T> sign(T x) { + return __sycl_std::__invoke_sign(x); +} + +/* --------------- 4.13.4 Integer functions. --------------------------------*/ +// ugeninteger abs (geninteger x) +template +std::enable_if_t, T> abs(T x) { + return __sycl_std::__invoke_u_abs(x); +} + +// ugeninteger abs_diff (geninteger x, geninteger y) +template +std::enable_if_t, T> abs_diff(T x, T y) { + return __sycl_std::__invoke_u_abs_diff(x, y); +} + +// ugeninteger abs_diff (geninteger x, geninteger y) +template +std::enable_if_t, detail::make_unsigned_t> +abs_diff(T x, T y) { + return __sycl_std::__invoke_s_abs_diff>(x, y); +} + +// geninteger add_sat (geninteger x, geninteger y) +template +std::enable_if_t, T> add_sat(T x, T y) { + return __sycl_std::__invoke_s_add_sat(x, y); +} + +// geninteger add_sat (geninteger x, geninteger y) +template +std::enable_if_t, T> add_sat(T x, T y) { + return __sycl_std::__invoke_u_add_sat(x, y); +} + +// geninteger hadd (geninteger x, geninteger y) +template +std::enable_if_t, T> hadd(T x, T y) { + return __sycl_std::__invoke_s_hadd(x, y); +} + +// geninteger hadd (geninteger x, geninteger y) +template +std::enable_if_t, T> hadd(T x, T y) { + return __sycl_std::__invoke_u_hadd(x, y); +} + +// geninteger rhadd (geninteger x, geninteger y) +template +std::enable_if_t, T> rhadd(T x, T y) { + return __sycl_std::__invoke_s_rhadd(x, y); +} + +// geninteger rhadd (geninteger x, geninteger y) +template +std::enable_if_t, T> rhadd(T x, T y) { + return __sycl_std::__invoke_u_rhadd(x, y); +} + +// geninteger clamp (geninteger x, geninteger minval, geninteger maxval) +template +std::enable_if_t, T> clamp(T x, T minval, + T maxval) { + return __sycl_std::__invoke_s_clamp(x, minval, maxval); +} + +// geninteger clamp (geninteger x, geninteger minval, geninteger maxval) +template +std::enable_if_t, T> clamp(T x, T minval, + T maxval) { + return __sycl_std::__invoke_u_clamp(x, minval, maxval); +} + +// geninteger clz (geninteger x) +template std::enable_if_t, T> clz(T x) { + return __sycl_std::__invoke_clz(x); +} + +// geninteger ctz (geninteger x) +template std::enable_if_t, T> ctz(T x) { + return __sycl_std::__invoke_ctz(x); +} + +// geninteger mad_hi (geninteger a, geninteger b, geninteger c) +template +std::enable_if_t, T> mad_hi(T x, T y, T z) { + return __sycl_std::__invoke_s_mad_hi(x, y, z); +} + +// geninteger mad_hi (geninteger a, geninteger b, geninteger c) +template +std::enable_if_t, T> mad_hi(T x, T y, T z) { + return __sycl_std::__invoke_u_mad_hi(x, y, z); +} + +// geninteger mad_sat (geninteger a, geninteger b, geninteger c) +template +std::enable_if_t, T> mad_sat(T a, T b, T c) { + return __sycl_std::__invoke_s_mad_sat(a, b, c); +} + +// geninteger mad_sat (geninteger a, geninteger b, geninteger c) +template +std::enable_if_t, T> mad_sat(T a, T b, T c) { + return __sycl_std::__invoke_u_mad_sat(a, b, c); +} + +// igeninteger max (igeninteger x, igeninteger y) +template +std::enable_if_t, T>(max)(T x, T y) { + return __sycl_std::__invoke_s_max(x, y); +} + +// ugeninteger max (ugeninteger x, ugeninteger y) +template +std::enable_if_t, T>(max)(T x, T y) { + return __sycl_std::__invoke_u_max(x, y); +} + +// igeninteger min (igeninteger x, igeninteger y) +template +std::enable_if_t, T>(min)(T x, T y) { + return __sycl_std::__invoke_s_min(x, y); +} + +// ugeninteger min (ugeninteger x, ugeninteger y) +template +std::enable_if_t, T>(min)(T x, T y) { + return __sycl_std::__invoke_u_min(x, y); +} + +// geninteger mul_hi (geninteger x, geninteger y) +template +std::enable_if_t, T> mul_hi(T x, T y) { + return __sycl_std::__invoke_s_mul_hi(x, y); +} + +// geninteger mul_hi (geninteger x, geninteger y) +template +std::enable_if_t, T> mul_hi(T x, T y) { + return __sycl_std::__invoke_u_mul_hi(x, y); +} + +// geninteger rotate (geninteger v, geninteger i) +template +std::enable_if_t, T> rotate(T v, T i) { + return __sycl_std::__invoke_rotate(v, i); +} + +// geninteger sub_sat (geninteger x, geninteger y) +template +std::enable_if_t, T> sub_sat(T x, T y) { + return __sycl_std::__invoke_s_sub_sat(x, y); +} + +// geninteger sub_sat (geninteger x, geninteger y) +template +std::enable_if_t, T> sub_sat(T x, T y) { + return __sycl_std::__invoke_u_sub_sat(x, y); +} + +// ugeninteger16bit upsample (ugeninteger8bit hi, ugeninteger8bit lo) +template +std::enable_if_t, detail::make_larger_t> +upsample(T hi, T lo) { + return __sycl_std::__invoke_u_upsample>(hi, lo); +} + +// igeninteger16bit upsample (igeninteger8bit hi, ugeninteger8bit lo) +template +std::enable_if_t && + detail::is_ugeninteger8bit_v, + detail::make_larger_t> +upsample(T hi, T2 lo) { + detail::check_vector_size(); + return __sycl_std::__invoke_s_upsample>(hi, lo); +} + +// ugeninteger32bit upsample (ugeninteger16bit hi, ugeninteger16bit lo) +template +std::enable_if_t, detail::make_larger_t> +upsample(T hi, T lo) { + return __sycl_std::__invoke_u_upsample>(hi, lo); +} + +// igeninteger32bit upsample (igeninteger16bit hi, ugeninteger16bit lo) +template +std::enable_if_t && + detail::is_ugeninteger16bit_v, + detail::make_larger_t> +upsample(T hi, T2 lo) { + detail::check_vector_size(); + return __sycl_std::__invoke_s_upsample>(hi, lo); +} + +// ugeninteger64bit upsample (ugeninteger32bit hi, ugeninteger32bit lo) +template +std::enable_if_t, detail::make_larger_t> +upsample(T hi, T lo) { + return __sycl_std::__invoke_u_upsample>(hi, lo); +} + +// igeninteger64bit upsample (igeninteger32bit hi, ugeninteger32bit lo) +template +std::enable_if_t && + detail::is_ugeninteger32bit_v, + detail::make_larger_t> +upsample(T hi, T2 lo) { + detail::check_vector_size(); + return __sycl_std::__invoke_s_upsample>(hi, lo); +} + +// geninteger popcount (geninteger x) +template +std::enable_if_t, T> popcount(T x) { + return __sycl_std::__invoke_popcount(x); +} + +// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, +// geninteger32bit z) +template +std::enable_if_t, T> mad24(T x, T y, T z) { + return __sycl_std::__invoke_s_mad24(x, y, z); +} + +// geninteger32bit mad24 (geninteger32bit x, geninteger32bit y, +// geninteger32bit z) +template +std::enable_if_t, T> mad24(T x, T y, T z) { + return __sycl_std::__invoke_u_mad24(x, y, z); +} + +// geninteger32bit mul24 (geninteger32bit x, geninteger32bit y) +template +std::enable_if_t, T> mul24(T x, T y) { + return __sycl_std::__invoke_s_mul24(x, y); +} + +// geninteger32bit mul24 (geninteger32bit x, geninteger32bit y) +template +std::enable_if_t, T> mul24(T x, T y) { + return __sycl_std::__invoke_u_mul24(x, y); +} + +/* --------------- 4.13.6 Geometric Functions. ------------------------------*/ +// float dot (float p0, float p1) +// double dot (double p0, double p1) +// half dot (half p0, half p1) +template +std::enable_if_t, T> dot(T p0, T p1) { + return p0 * p1; +} + +/* SYCL 1.2.1 ---- 4.13.7 Relational functions. -----------------------------*/ +/* SYCL 2020 ---- 4.17.9 Relational functions. -----------------------------*/ + +template , T>> +detail::common_rel_ret_t isequal(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FOrdEqual>(x, y)); +} + +template , T>> +detail::common_rel_ret_t isnotequal(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FUnordNotEqual>(x, y)); +} + +template , T>> +detail::common_rel_ret_t isgreater(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FOrdGreaterThan>(x, + y)); +} + +template , T>> +detail::common_rel_ret_t isgreaterequal(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FOrdGreaterThanEqual>( + x, y)); +} + +template , T>> +detail::common_rel_ret_t isless(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FOrdLessThan>(x, y)); +} + +template , T>> +detail::common_rel_ret_t islessequal(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FOrdLessThanEqual>(x, + y)); +} + +template , T>> +detail::common_rel_ret_t islessgreater(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_FOrdNotEqual>(x, y)); +} + +template , T>> +detail::common_rel_ret_t isfinite(T x) { + return detail::RelConverter::apply( + __sycl_std::__invoke_IsFinite>(x)); +} + +template , T>> +detail::common_rel_ret_t isinf(T x) { + return detail::RelConverter::apply( + __sycl_std::__invoke_IsInf>(x)); +} + +template , T>> +detail::common_rel_ret_t isnan(T x) { + return detail::RelConverter::apply( + __sycl_std::__invoke_IsNan>(x)); +} + +template , T>> +detail::common_rel_ret_t isnormal(T x) { + return detail::RelConverter::apply( + __sycl_std::__invoke_IsNormal>(x)); +} + +template , T>> +detail::common_rel_ret_t isordered(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_Ordered>(x, y)); +} + +template , T>> +detail::common_rel_ret_t isunordered(T x, T y) { + return detail::RelConverter::apply( + __sycl_std::__invoke_Unordered>(x, y)); +} + +template , T>> +detail::common_rel_ret_t signbit(T x) { + return detail::RelConverter::apply( + __sycl_std::__invoke_SignBitSet>(x)); +} + +// bool any (sigeninteger x) +template +std::enable_if_t, bool> any(T x) { + return detail::msbIsSet(x); +} + +// bool all (sigeninteger x) +template +std::enable_if_t, bool> all(T x) { + return detail::msbIsSet(x); +} + +// gentype bitselect (gentype a, gentype b, gentype c) +template +std::enable_if_t, T> bitselect(T a, T b, T c) { + return __sycl_std::__invoke_bitselect(a, b, c); +} + +// sgentype select (sgentype a, sgentype b, bool c) +template +std::enable_if_t, T> select(T a, T b, bool c) { + constexpr size_t SizeT = sizeof(T); + + // sycl::select(sgentype a, sgentype b, bool c) calls OpenCL built-in + // select(sgentype a, sgentype b, igentype c). This type trait makes the + // proper conversion for argument c from bool to igentype, based on sgentype + // == T. + using get_select_opencl_builtin_c_arg_type = typename std::conditional_t< + SizeT == 1, char, + std::conditional_t< + SizeT == 2, short, + std::conditional_t< + (detail::is_contained< + T, detail::type_list>::value && + (SizeT == 4 || SizeT == 8)), + long, // long and ulong are 32-bit on + // Windows and 64-bit on Linux + std::conditional_t< + SizeT == 4, int, + std::conditional_t>>>>; + + return __sycl_std::__invoke_select( + a, b, static_cast(c)); +} + +// geninteger select (geninteger a, geninteger b, igeninteger c) +template +std::enable_if_t && detail::is_igeninteger_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// geninteger select (geninteger a, geninteger b, ugeninteger c) +template +std::enable_if_t && detail::is_ugeninteger_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// svgenfloatf select (svgenfloatf a, svgenfloatf b, genint c) +template +std::enable_if_t && detail::is_genint_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// svgenfloatf select (svgenfloatf a, svgenfloatf b, ugenint c) +template +std::enable_if_t && detail::is_ugenint_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// svgenfloatd select (svgenfloatd a, svgenfloatd b, igeninteger64 c) +template +std::enable_if_t< + detail::is_svgenfloatd_v && detail::is_igeninteger64bit_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// svgenfloatd select (svgenfloatd a, svgenfloatd b, ugeninteger64 c) +template +std::enable_if_t< + detail::is_svgenfloatd_v && detail::is_ugeninteger64bit_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// svgenfloath select (svgenfloath a, svgenfloath b, igeninteger16 c) +template +std::enable_if_t< + detail::is_svgenfloath_v && detail::is_igeninteger16bit_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +// svgenfloath select (svgenfloath a, svgenfloath b, ugeninteger16 c) +template +std::enable_if_t< + detail::is_svgenfloath_v && detail::is_ugeninteger16bit_v, T> +select(T a, T b, T2 c) { + detail::check_vector_size(); + return __sycl_std::__invoke_select(a, b, c); +} + +namespace native { +/* ----------------- 4.13.3 Math functions. ---------------------------------*/ + +// svgenfloatf cos (svgenfloatf x) +template +std::enable_if_t, T> cos(T x) { + return __sycl_std::__invoke_native_cos(x); +} + +// svgenfloatf divide (svgenfloatf x, svgenfloatf y) +template +std::enable_if_t, T> divide(T x, T y) { + return __sycl_std::__invoke_native_divide(x, y); +} + +// svgenfloatf exp (svgenfloatf x) +template +std::enable_if_t, T> exp(T x) { + return __sycl_std::__invoke_native_exp(x); +} + +// svgenfloatf exp2 (svgenfloatf x) +template +std::enable_if_t, T> exp2(T x) { + return __sycl_std::__invoke_native_exp2(x); +} + +// svgenfloatf exp10 (svgenfloatf x) +template +std::enable_if_t, T> exp10(T x) { + return __sycl_std::__invoke_native_exp10(x); +} + +// svgenfloatf log (svgenfloatf x) +template +std::enable_if_t, T> log(T x) { + return __sycl_std::__invoke_native_log(x); +} + +// svgenfloatf log2 (svgenfloatf x) +template +std::enable_if_t, T> log2(T x) { + return __sycl_std::__invoke_native_log2(x); +} + +// svgenfloatf log10 (svgenfloatf x) +template +std::enable_if_t, T> log10(T x) { + return __sycl_std::__invoke_native_log10(x); +} + +// svgenfloatf powr (svgenfloatf x, svgenfloatf y) +template +std::enable_if_t, T> powr(T x, T y) { + return __sycl_std::__invoke_native_powr(x, y); +} + +// svgenfloatf recip (svgenfloatf x) +template +std::enable_if_t, T> recip(T x) { + return __sycl_std::__invoke_native_recip(x); +} + +// svgenfloatf rsqrt (svgenfloatf x) +template +std::enable_if_t, T> rsqrt(T x) { + return __sycl_std::__invoke_native_rsqrt(x); +} + +// svgenfloatf sin (svgenfloatf x) +template +std::enable_if_t, T> sin(T x) { + return __sycl_std::__invoke_native_sin(x); +} + +// svgenfloatf sqrt (svgenfloatf x) +template +std::enable_if_t, T> sqrt(T x) { + return __sycl_std::__invoke_native_sqrt(x); +} + +// svgenfloatf tan (svgenfloatf x) +template +std::enable_if_t, T> tan(T x) { + return __sycl_std::__invoke_native_tan(x); +} + +} // namespace native +namespace half_precision { +/* ----------------- 4.13.3 Math functions. ---------------------------------*/ + +// svgenfloatf cos (svgenfloatf x) +template +std::enable_if_t, T> cos(T x) { + return __sycl_std::__invoke_half_cos(x); +} + +// svgenfloatf divide (svgenfloatf x, svgenfloatf y) +template +std::enable_if_t, T> divide(T x, T y) { + return __sycl_std::__invoke_half_divide(x, y); +} + +// svgenfloatf exp (svgenfloatf x) +template +std::enable_if_t, T> exp(T x) { + return __sycl_std::__invoke_half_exp(x); +} + +// svgenfloatf exp2 (svgenfloatf x) +template +std::enable_if_t, T> exp2(T x) { + return __sycl_std::__invoke_half_exp2(x); +} + +// svgenfloatf exp10 (svgenfloatf x) +template +std::enable_if_t, T> exp10(T x) { + return __sycl_std::__invoke_half_exp10(x); +} + +// svgenfloatf log (svgenfloatf x) +template +std::enable_if_t, T> log(T x) { + return __sycl_std::__invoke_half_log(x); +} + +// svgenfloatf log2 (svgenfloatf x) +template +std::enable_if_t, T> log2(T x) { + return __sycl_std::__invoke_half_log2(x); +} + +// svgenfloatf log10 (svgenfloatf x) +template +std::enable_if_t, T> log10(T x) { + return __sycl_std::__invoke_half_log10(x); +} + +// svgenfloatf powr (svgenfloatf x, svgenfloatf y) +template +std::enable_if_t, T> powr(T x, T y) { + return __sycl_std::__invoke_half_powr(x, y); +} + +// svgenfloatf recip (svgenfloatf x) +template +std::enable_if_t, T> recip(T x) { + return __sycl_std::__invoke_half_recip(x); +} + +// svgenfloatf rsqrt (svgenfloatf x) +template +std::enable_if_t, T> rsqrt(T x) { + return __sycl_std::__invoke_half_rsqrt(x); +} + +// svgenfloatf sin (svgenfloatf x) +template +std::enable_if_t, T> sin(T x) { + return __sycl_std::__invoke_half_sin(x); +} + +// svgenfloatf sqrt (svgenfloatf x) +template +std::enable_if_t, T> sqrt(T x) { + return __sycl_std::__invoke_half_sqrt(x); +} + +// svgenfloatf tan (svgenfloatf x) +template +std::enable_if_t, T> tan(T x) { + return __sycl_std::__invoke_half_tan(x); +} + +} // namespace half_precision + +#ifdef __FAST_MATH__ +/* ----------------- -ffast-math functions. ---------------------------------*/ + +// svgenfloatf cos (svgenfloatf x) +template +std::enable_if_t, T> cos(T x) { + return native::cos(x); +} + +// svgenfloat sincos (svgenfloat x, genfloatptr cosval) +// This is a performance optimization to ensure that sincos isn't slower than a +// pair of sin/cos executed separately. Theoretically, calling non-native sincos +// might be faster than calling native::sin plus native::cos separately and we'd +// need some kind of cost model to make the right decision (and move this +// entirely to the JIT/AOT compilers). However, in practice, this simpler +// solution seems to work just fine and matches how sin/cos above are optimized +// for the fast math path. +template +std::enable_if_t && detail::is_genfloatptr_v, T> +sincos(T x, T2 cosval) { + detail::check_vector_size(); + *cosval = native::cos(x); + return native::sin(x); +} + +// svgenfloatf exp (svgenfloatf x) +template +std::enable_if_t, T> exp(T x) { + return native::exp(x); +} + +// svgenfloatf exp2 (svgenfloatf x) +template +std::enable_if_t, T> exp2(T x) { + return native::exp2(x); +} + +// svgenfloatf exp10 (svgenfloatf x) +template +std::enable_if_t, T> exp10(T x) { + return native::exp10(x); +} + +// svgenfloatf log(svgenfloatf x) +template +std::enable_if_t, T> log(T x) { + return native::log(x); +} + +// svgenfloatf log2 (svgenfloatf x) +template +std::enable_if_t, T> log2(T x) { + return native::log2(x); +} + +// svgenfloatf log10 (svgenfloatf x) +template +std::enable_if_t, T> log10(T x) { + return native::log10(x); +} + +// svgenfloatf powr (svgenfloatf x) +template +std::enable_if_t, T> powr(T x, T y) { + return native::powr(x, y); +} + +// svgenfloatf rsqrt (svgenfloatf x) +template +std::enable_if_t, T> rsqrt(T x) { + return native::rsqrt(x); +} + +// svgenfloatf sin (svgenfloatf x) +template +std::enable_if_t, T> sin(T x) { + return native::sin(x); +} + +// svgenfloatf sqrt (svgenfloatf x) +template +std::enable_if_t, T> sqrt(T x) { + return native::sqrt(x); +} + +// svgenfloatf tan (svgenfloatf x) +template +std::enable_if_t, T> tan(T x) { + return native::tan(x); +} + +#endif // __FAST_MATH__ +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/builtins_utils_scalar.hpp b/sycl/include/sycl/builtins_utils_scalar.hpp index 0501e1b607ed1..c6c6067641c20 100644 --- a/sycl/include/sycl/builtins_utils_scalar.hpp +++ b/sycl/include/sycl/builtins_utils_scalar.hpp @@ -32,13 +32,6 @@ namespace __sycl_std = __host_std; #endif namespace detail { -// Get the element type of T. If T is a scalar, the element type is considered -// the type of the scalar. -template struct get_elem_type { - using type = T; -}; - -template using get_elem_type_t = typename get_elem_type::type; #ifdef __FAST_MATH__ template struct use_fast_math diff --git a/sycl/include/sycl/builtins_utils_vec.hpp b/sycl/include/sycl/builtins_utils_vec.hpp index 491e0563cfa5a..0fbe006fae123 100644 --- a/sycl/include/sycl/builtins_utils_vec.hpp +++ b/sycl/include/sycl/builtins_utils_vec.hpp @@ -16,21 +16,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -// Get the element type of T. If T is a scalar, the element type is considered -// the type of the scalar. -template struct get_elem_type> { - using type = T; -}; -template struct get_elem_type> { - using type = T; -}; -template class OperationCurrentT, int... Indexes> -struct get_elem_type> { - using type = typename get_elem_type>::type; -}; - template struct is_swizzle : std::false_type {}; template class OperationCurrentT, int... Indexes> diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index 9feb2d5ffc818..cf2869c34699b 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -77,8 +77,7 @@ inline constexpr bool is_svgenfloat_v = template inline constexpr bool is_mgenfloat_v = - std::is_same_v, T::size()>> && - is_svgenfloat_v>; + is_marray_v && is_svgenfloat_v>; template inline constexpr bool is_gengeofloat_v = is_contained_v; @@ -214,14 +213,23 @@ inline constexpr bool is_ugenlonginteger_v = template inline constexpr bool is_geninteger_v = is_contained_v; +template +using is_geninteger = std::bool_constant>; + template inline constexpr bool is_igeninteger_v = is_contained_v; +template +using is_igeninteger = std::bool_constant>; + template inline constexpr bool is_ugeninteger_v = is_contained_v; +template +using is_ugeninteger = std::bool_constant>; + template inline constexpr bool is_sgeninteger_v = is_contained_v; @@ -258,6 +266,54 @@ inline constexpr bool is_vgentype_v = is_contained_v; template inline constexpr bool is_sgentype_v = is_contained_v; +template +inline constexpr bool is_igeninteger8bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_igeninteger16bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_igeninteger32bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_igeninteger64bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_ugeninteger8bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_ugeninteger16bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_ugeninteger32bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_ugeninteger64bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_geninteger8bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_geninteger16bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_geninteger32bit_v = + is_gen_based_on_type_sizeof_v; + +template +inline constexpr bool is_geninteger64bit_v = + is_gen_based_on_type_sizeof_v; + template inline constexpr bool is_genintptr_v = is_pointer_v && is_genint_v> && @@ -694,7 +750,16 @@ template using rel_sign_bit_test_arg_t = typename RelationalTestForSignBitType::argument_type; -template struct RelConverter; +template struct RelConverter { + using R = internal_rel_ret_t; +#ifdef __SYCL_DEVICE_ONLY__ + using value_t = bool; +#else + using value_t = R; +#endif + + static R apply(value_t value) { return value; } +}; template struct RelConverter struct RelConverter { - static bool apply(bool value) { return value; } -}; - template static constexpr T max_v() { return (std::numeric_limits::max)(); } diff --git a/sycl/include/sycl/detail/type_traits.hpp b/sycl/include/sycl/detail/type_traits.hpp index f9c61c3b97a55..301093207a4b6 100644 --- a/sycl/include/sycl/detail/type_traits.hpp +++ b/sycl/include/sycl/detail/type_traits.hpp @@ -25,6 +25,10 @@ template struct is_fixed_size_group : std::false_type {}; template inline constexpr bool is_fixed_size_group_v = is_fixed_size_group::value; + +template class OperationCurrentT, int... Indexes> +class SwizzleOp; } // namespace detail template class group; @@ -148,6 +152,26 @@ template using vector_element_t = typename vector_element::type; template using marray_element_t = typename T::value_type; +// get_elem_type +// Get the element type of T. If T is a scalar, the element type is considered +// the type of the scalar. +template struct get_elem_type { + using type = T; +}; +template struct get_elem_type> { + using type = T; +}; +template struct get_elem_type> { + using type = T; +}; +template class OperationCurrentT, int... Indexes> +struct get_elem_type> { + using type = typename get_elem_type>::type; +}; +template using get_elem_type_t = typename get_elem_type::type; + // change_base_type_t template struct change_base_type { using type = B; @@ -215,8 +239,8 @@ template struct make_unsigned> { // Checks that sizeof base type of T equal N and T satisfies S::value template class S> -using is_gen_based_on_type_sizeof = - std::bool_constant::value && (sizeof(vector_element_t) == N)>; +inline constexpr bool is_gen_based_on_type_sizeof_v = + S::value && (sizeof(vector_element_t) == N); template struct is_vec : std::false_type {}; template struct is_vec> : std::true_type {}; diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index a03e838669296..adf38c9e4364e 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -57,7 +57,12 @@ #include // for is_sigeninteger, is_s... #ifndef __SYCL_DEVICE_ONLY__ + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES #include // for ceil, floor, rint, trunc +#else // __INTEL_PREVIEW_BREAKING_CHANGES +#include // for ceil, floor, rint, trunc +#endif // __INTEL_PREVIEW_BREAKING_CHANGES #include // for fesetround, fegetround #endif diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index d12f1be04b156..14b36a29ccd73 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -10,6 +10,7 @@ #include // for half #include // for to_vec2 +#include // for to_vec, to_marray... #include // for __invoke_exp2, __invo... #include // for __SYCL_ALWAYS_INLINE #include // for is_svgenfloath, is_sv... diff --git a/sycl/include/sycl/ext/oneapi/experimental/complex/detail/complex_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/complex/detail/complex_math.hpp index cca76b7e988a5..5635ab330d300 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/complex/detail/complex_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/complex/detail/complex_math.hpp @@ -139,7 +139,7 @@ __DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY typedef typename cplx::detail::__libcpp_complex_overload_traits<_Tp>::_ValueType _ValueType; - return sycl::atan2(static_cast<_ValueType>(0), __re); + return sycl::atan2(static_cast<_ValueType>(0), static_cast<_ValueType>(__re)); } // norm diff --git a/sycl/test-e2e/Basic/built-ins/host_math.cpp b/sycl/test-e2e/Basic/built-ins/host_math.cpp index 52ddcb91b21a2..67d1d85900c1f 100644 --- a/sycl/test-e2e/Basic/built-ins/host_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/host_math.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} #include #include diff --git a/sycl/test-e2e/Basic/built-ins/marray_common.cpp b/sycl/test-e2e/Basic/built-ins/marray_common.cpp index bac942ee6503d..1fc5b098688bd 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_common.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_common.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} #ifdef _WIN32 #define _USE_MATH_DEFINES // To use math constants diff --git a/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp b/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp index 70cc0ae0ae749..d2feb1a497ce2 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_geometric.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} #include diff --git a/sycl/test-e2e/Basic/built-ins/marray_math.cpp b/sycl/test-e2e/Basic/built-ins/marray_math.cpp index d102ded0d4f74..30f0f067b438b 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_math.cpp @@ -2,6 +2,8 @@ // RUN: %{build} %{mathflags} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} %{mathflags} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} #include diff --git a/sycl/test-e2e/Basic/built-ins/marray_relational.cpp b/sycl/test-e2e/Basic/built-ins/marray_relational.cpp index 765d28cd49097..2ea420dad8302 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_relational.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_relational.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} #include diff --git a/sycl/test-e2e/Basic/built-ins/vec_common.cpp b/sycl/test-e2e/Basic/built-ins/vec_common.cpp index f56f673ab1cd0..fe516d090a2ad 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_common.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_common.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} // RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %s -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} @@ -58,6 +60,88 @@ int main() { // sycl::clamp TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va2, va3); + TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, 1.0f, 3.0f); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::clamp, double, 2, EXPECTED(double, 1.0, 2.0), 0, va4, 1.0, 3.0); + } + // sycl::degrees + TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, va5); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, va6); + } + if (dev.has(sycl::aspect::fp16)) { + TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, + va7); + } + // sycl::max + TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va1, va3); + TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, va1, 1.5f); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, va4, 1.5); + } + // sycl::min + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va3); + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 1.5f), 0, va1, 1.5f); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, va4, 1.5); + } + // sycl::mix + TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, va1, va3, va8); + TEST(sycl::mix, float, 2, EXPECTED(float, 1.4f, 2.0f), 0, va1, va3, 0.2); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, va4, va9, 0.5); + } + // sycl::radians + TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, va10); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, va11); + } + if (dev.has(sycl::aspect::fp16)) { + TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), + 0.002, va12); + } + // sycl::step + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va1, va3); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, va4, va9); + } + if (dev.has(sycl::aspect::fp16)) { + TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, + va12, va13); + } + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, va3); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, va9); + } + // sycl::smoothstep + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va8, va1, + va2); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0, 1.0f), 0.00000001, + va4, va9, va9); + } + if (dev.has(sycl::aspect::fp16)) { + TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), + 0, va7, va12, va13); + } + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, + 2.5f, 6.0f, va3); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, + 8.0f, va9); + } + // sign + TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, va14); + if (dev.has(sycl::aspect::fp64)) { + TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, va15); + } + if (dev.has(sycl::aspect::fp16)) { + TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, + va12); + } + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + // sycl::clamp swizzled TEST(sycl::clamp, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va16.swizzle<1, 0>(), va2, va3); TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va2, @@ -68,59 +152,46 @@ int main() { va16.swizzle<1, 0>(), va16.swizzle<1, 0>()); TEST(sycl::clamp, float, 2, EXPECTED(float, 360.0f, 180.0f), 0, va16.swizzle<1, 0>(), va16.swizzle<1, 0>(), va16.swizzle<1, 0>()); - TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, 1.0f, 3.0f); TEST(sycl::clamp, float, 2, EXPECTED(float, 3.0f, 3.0f), 0, va16.swizzle<1, 0>(), 1.0f, 3.0f); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::clamp, double, 2, EXPECTED(double, 1.0, 2.0), 0, va4, 1.0, 3.0); TEST(sycl::clamp, double, 2, EXPECTED(double, 3.0, 3.0), 0, va11.swizzle<1, 0>(), 1.0, 3.0); } - // sycl::degrees - TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, va5); + // sycl::degrees swizzled TEST(sycl::degrees, float, 2, EXPECTED(float, 180, 180), 0, va5.swizzle<1, 0>()); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, va6); TEST(sycl::degrees, double, 2, EXPECTED(double, 180, 180), 0, va6.swizzle<1, 0>()); } if (dev.has(sycl::aspect::fp16)) { - TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, - va7); TEST(sycl::degrees, sycl::half, 2, EXPECTED(sycl::half, 180, 180), 0.2, va7.swizzle<1, 0>()); } - // sycl::max - TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va1, va3); + // sycl::max swizzled TEST(sycl::max, float, 2, EXPECTED(float, 360.0f, 180.0f), 0, va16.swizzle<1, 0>(), va3); TEST(sycl::max, float, 2, EXPECTED(float, 360.0f, 180.0f), 0, va1, va16.swizzle<1, 0>()); - TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, va1, 1.5f); TEST(sycl::max, float, 2, EXPECTED(float, 360.0f, 190.0f), 0, va16.swizzle<1, 0>(), 190.0f); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, va4, 1.5); TEST(sycl::max, double, 2, EXPECTED(double, 360.0, 190.0), 0, va17.swizzle<1, 0>(), 190.0); } - // sycl::min - TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va3); + // sycl::min swizzled TEST(sycl::min, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va16.swizzle<1, 0>(), va3); TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va16.swizzle<1, 0>()); - TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 1.5f), 0, va1, 1.5f); TEST(sycl::min, float, 2, EXPECTED(float, 190.0f, 180.0f), 0, va16.swizzle<1, 0>(), 190.0f); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, va4, 1.5); TEST(sycl::min, double, 2, EXPECTED(double, 190.0f, 180.0f), 0, va17.swizzle<1, 0>(), 190.0); } - // sycl::mix - TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, va1, va3, va8); + // sycl::mix swizzled TEST(sycl::mix, float, 2, EXPECTED(float, 252.9f, 73.2f), 0, va16.swizzle<1, 0>(), va3, va8); TEST(sycl::mix, float, 2, EXPECTED(float, 252.9f, 73.2f), 0, @@ -135,9 +206,7 @@ int main() { va16.swizzle<1, 0>(), va16.swizzle<1, 0>(), va18.swizzle<0, 1>()); TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, va1, va3, va18.swizzle<0, 1>()); - TEST(sycl::mix, float, 2, EXPECTED(float, 1.4f, 2.0f), 0, va1, va3, 0.2); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, va4, va9, 0.5); TEST(sycl::mix, double, 2, EXPECTED(double, 182.5, 94.0), 0, va17.swizzle<1, 0>(), va9, 0.5); TEST(sycl::mix, double, 2, EXPECTED(double, 180.5, 91.0), 0, va4, @@ -145,23 +214,18 @@ int main() { TEST(sycl::mix, double, 2, EXPECTED(double, 360.0, 180.0), 0, va17.swizzle<1, 0>(), va17.swizzle<1, 0>(), 0.5); } - // sycl::radians - TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, va10); + // sycl::radians swizzled TEST(sycl::radians, float, 2, EXPECTED(float, M_PI, M_PI), 0, va10.swizzle<1, 0>()); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, va11); TEST(sycl::radians, double, 2, EXPECTED(double, M_PI, M_PI), 0, va11.swizzle<1, 0>()); } if (dev.has(sycl::aspect::fp16)) { - TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), - 0.002, va12); TEST(sycl::radians, sycl::half, 2, EXPECTED(sycl::half, M_PI, M_PI), 0.002, va12.swizzle<1, 0>()); } - // sycl::step - TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va1, va3); + // sycl::step swizzled TEST(sycl::step, float, 2, EXPECTED(float, 0.0f, 0.0f), 0, va16.swizzle<1, 0>(), va3); TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va1, @@ -169,7 +233,6 @@ int main() { TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va16.swizzle<1, 0>(), va16.swizzle<1, 0>()); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, va4, va9); TEST(sycl::step, double, 2, EXPECTED(double, 0.0, 0.0), 0, va17.swizzle<1, 0>(), va9); TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, va4, @@ -178,8 +241,6 @@ int main() { va17.swizzle<1, 0>(), va17.swizzle<1, 0>()); } if (dev.has(sycl::aspect::fp16)) { - TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, - va12, va13); TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, va12.swizzle<0, 1, 2>(), va13); TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, @@ -187,17 +248,13 @@ int main() { TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, va12.swizzle<0, 1, 2>(), va13.swizzle<0, 1, 2>()); } - TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, va3); TEST(sycl::step, float, 2, EXPECTED(float, 0.0f, 1.0f), 0, 2.5f, va3.swizzle<1, 0>()); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, va9); TEST(sycl::step, double, 2, EXPECTED(double, 1.0f, 0.0f), 0, 6.0f, va9.swizzle<1, 0>()); } - // sycl::smoothstep - TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va8, va1, - va2); + // sycl::smoothstep swizzled TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va8.swizzle<0, 1>(), va1, va2); TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va8, @@ -213,8 +270,6 @@ int main() { TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, va8.swizzle<0, 1>(), va1.swizzle<0, 1>(), va2.swizzle<0, 1>()); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0, 1.0f), 0.00000001, - va4, va9, va9); TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0, 1.0f), 0.00000001, va4.swizzle<0, 1>(), va9, va9); TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0, 1.0f), 0.00000001, @@ -231,8 +286,6 @@ int main() { va4.swizzle<0, 1>(), va9.swizzle<0, 1>(), va9.swizzle<0, 1>()); } if (dev.has(sycl::aspect::fp16)) { - TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), - 0, va7, va12, va13); TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, va7.swizzle<0, 1, 2>(), va12, va13); TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), @@ -249,8 +302,6 @@ int main() { 0, va7.swizzle<0, 1, 2>(), va12.swizzle<0, 1, 2>(), va13.swizzle<0, 1, 2>()); } - TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, - 2.5f, 6.0f, va3); TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0f, 0.0553936f), 0.0000001, 2.5f, 6.0f, va3.swizzle<1, 0>()); if (dev.has(sycl::aspect::fp64)) { @@ -259,21 +310,18 @@ int main() { TEST(sycl::smoothstep, double, 2, EXPECTED(double, 1.0f, 0.0f), 0, 6.0f, 8.0f, va9.swizzle<1, 0>()); } - // sign - TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, va14); + // sign swizzled TEST(sycl::sign, float, 2, EXPECTED(float, -1.0f, +0.0f), 0, va14.swizzle<1, 0>()); if (dev.has(sycl::aspect::fp64)) { - TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, va15); TEST(sycl::sign, double, 2, EXPECTED(double, 1.0, -0.0), 0, va15.swizzle<1, 0>()); } if (dev.has(sycl::aspect::fp16)) { - TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, - va12); TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, va12.swizzle<2, 1, 0>()); } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES return 0; } diff --git a/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp b/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp index 754428be02191..be3c3942934ee 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} // RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %s -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} @@ -67,70 +69,38 @@ int main() { VFloatD3_2); TEST(sycl::cross, float, 4, EXPECTED(float, -1.f, -4.f, 3.f, 0.f), 0, VFloatD4, VFloatD4_2); - TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, - VFloatD4.swizzle<0, 1, 2>(), VFloatD3_2); - TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, VFloatD3, - VFloatD4_2.swizzle<0, 1, 2>()); - TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, - VFloatD4.swizzle<0, 1, 2>(), VFloatD4_2.swizzle<0, 1, 2>()); if (Dev.has(sycl::aspect::fp64)) { TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, VDoubleD3, VDoubleD3_2); TEST(sycl::cross, double, 4, EXPECTED(double, -1.f, -4.f, 3.f, 0.f), 0, VDoubleD4, VDoubleD4_2); - TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, - VDoubleD3, VDoubleD4_2.swizzle<0, 1, 2>()); - TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, - VDoubleD4.swizzle<0, 1, 2>(), VDoubleD3_2); - TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, - VDoubleD4.swizzle<0, 1, 2>(), VDoubleD4_2.swizzle<0, 1, 2>()); } TEST2(sycl::dot, float, 13.f, 0, VFloatD2, VFloatD2_2); TEST2(sycl::dot, float, 32.f, 0, VFloatD3, VFloatD3_2); TEST2(sycl::dot, float, 48.f, 0, VFloatD4, VFloatD4_2); - TEST2(sycl::dot, float, 32.f, 0, VFloatD4.swizzle<0, 1, 2>(), VFloatD3_2); - TEST2(sycl::dot, float, 32.f, 0, VFloatD3, VFloatD4_2.swizzle<0, 1, 2>()); - TEST2(sycl::dot, float, 32.f, 0, VFloatD4.swizzle<0, 1, 2>(), - VFloatD4_2.swizzle<0, 1, 2>()); if (Dev.has(sycl::aspect::fp64)) { TEST2(sycl::dot, double, 13, 0, VDoubleD2, VDoubleD2_2); TEST2(sycl::dot, double, 32, 0, VDoubleD3, VDoubleD3_2); TEST2(sycl::dot, double, 48, 0, VDoubleD4, VDoubleD4_2); - TEST2(sycl::dot, double, 32, 0, VDoubleD4.swizzle<0, 1, 2>(), VDoubleD3_2); - TEST2(sycl::dot, double, 32, 0, VDoubleD3, VDoubleD4_2.swizzle<0, 1, 2>()); - TEST2(sycl::dot, double, 32, 0, VDoubleD4.swizzle<0, 1, 2>(), - VDoubleD4_2.swizzle<0, 1, 2>()); } TEST2(sycl::length, float, 2.236068f, 1e-6, VFloatD2); TEST2(sycl::length, float, 3.741657f, 1e-6, VFloatD3); TEST2(sycl::length, float, 5.477225f, 1e-6, VFloatD4); - TEST2(sycl::length, float, 3.741657f, 1e-6, VFloatD4.swizzle<0, 1, 2>()); if (Dev.has(sycl::aspect::fp64)) { TEST2(sycl::length, double, 2.236068, 1e-6, VDoubleD2); TEST2(sycl::length, double, 3.741657, 1e-6, VDoubleD3); TEST2(sycl::length, double, 5.477225, 1e-6, VDoubleD4); - TEST2(sycl::length, double, 3.741657, 1e-6, VDoubleD4.swizzle<0, 1, 2>()); } TEST2(sycl::distance, float, 3.605551f, 1e-6, VFloatD2, VFloatD2_2); TEST2(sycl::distance, float, 5.f, 0, VFloatD3, VFloatD3_2); TEST2(sycl::distance, float, 5.f, 0, VFloatD4, VFloatD4_2); - TEST2(sycl::distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), VFloatD3_2); - TEST2(sycl::distance, float, 5.f, 0, VFloatD3, VFloatD4_2.swizzle<0, 1, 2>()); - TEST2(sycl::distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), - VFloatD4_2.swizzle<0, 1, 2>()); if (Dev.has(sycl::aspect::fp64)) { TEST2(sycl::distance, double, 3.605551, 1e-6, VDoubleD2, VDoubleD2_2); TEST2(sycl::distance, double, 5.0, 0, VDoubleD3, VDoubleD3_2); TEST2(sycl::distance, double, 5.0, 0, VDoubleD4, VDoubleD4_2); - TEST2(sycl::distance, double, 5.0, 0, VDoubleD4.swizzle<0, 1, 2>(), - VDoubleD3_2); - TEST2(sycl::distance, double, 5.0, 0, VDoubleD3, - VDoubleD4_2.swizzle<0, 1, 2>()); - TEST2(sycl::distance, double, 5.0, 0, VDoubleD4.swizzle<0, 1, 2>(), - VDoubleD4_2.swizzle<0, 1, 2>()); } TEST(sycl::normalize, float, 2, EXPECTED(float, 0.447213f, 0.894427f), 1e-6, @@ -140,8 +110,6 @@ int main() { TEST(sycl::normalize, float, 4, EXPECTED(float, 0.182574f, 0.365148f, 0.547723f, 0.730297f), 1e-6, VFloatD4); - TEST(sycl::normalize, float, 3, EXPECTED(float, 0.267261, 0.534522, 0.801784), - 1e-6, VFloatD4.swizzle<0, 1, 2>()); if (Dev.has(sycl::aspect::fp64)) { TEST(sycl::normalize, double, 2, EXPECTED(double, 0.447213, 0.894427), 1e-6, VDoubleD2); @@ -150,25 +118,15 @@ int main() { TEST(sycl::normalize, double, 4, EXPECTED(double, 0.182574, 0.365148, 0.547723, 0.730297), 1e-6, VDoubleD4); - TEST(sycl::normalize, double, 3, - EXPECTED(double, 0.267261, 0.534522, 0.801784), 1e-6, - VDoubleD4.swizzle<0, 1, 2>()); } TEST2(sycl::fast_distance, float, 3.605551f, 1e-6, VFloatD2, VFloatD2_2); TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD3, VFloatD3_2); TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD4, VFloatD4_2); - TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), - VFloatD3_2); - TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD3, - VFloatD4_2.swizzle<0, 1, 2>()); - TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), - VFloatD4_2.swizzle<0, 1, 2>()); TEST2(sycl::fast_length, float, 2.236068f, 1e-6, VFloatD2); TEST2(sycl::fast_length, float, 3.741657f, 1e-6, VFloatD3); TEST2(sycl::fast_length, float, 5.477225f, 1e-6, VFloatD4); - TEST2(sycl::fast_length, float, 3.741657f, 1e-6, VFloatD4.swizzle<0, 1, 2>()); TEST(sycl::fast_normalize, float, 2, EXPECTED(float, 0.447213f, 0.894427f), 1e-3, VFloatD2); @@ -177,9 +135,73 @@ int main() { TEST(sycl::fast_normalize, float, 4, EXPECTED(float, 0.182574f, 0.365148f, 0.547723f, 0.730297f), 1e-3, VFloatD4); + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, + VFloatD4.swizzle<0, 1, 2>(), VFloatD3_2); + TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, VFloatD3, + VFloatD4_2.swizzle<0, 1, 2>()); + TEST(sycl::cross, float, 3, EXPECTED(float, -1.f, -4.f, 3.f), 0, + VFloatD4.swizzle<0, 1, 2>(), VFloatD4_2.swizzle<0, 1, 2>()); + if (Dev.has(sycl::aspect::fp64)) { + TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, + VDoubleD3, VDoubleD4_2.swizzle<0, 1, 2>()); + TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, + VDoubleD4.swizzle<0, 1, 2>(), VDoubleD3_2); + TEST(sycl::cross, double, 3, EXPECTED(double, -1.f, -4.f, 3.f), 0, + VDoubleD4.swizzle<0, 1, 2>(), VDoubleD4_2.swizzle<0, 1, 2>()); + } + + TEST2(sycl::dot, float, 32.f, 0, VFloatD4.swizzle<0, 1, 2>(), VFloatD3_2); + TEST2(sycl::dot, float, 32.f, 0, VFloatD3, VFloatD4_2.swizzle<0, 1, 2>()); + TEST2(sycl::dot, float, 32.f, 0, VFloatD4.swizzle<0, 1, 2>(), + VFloatD4_2.swizzle<0, 1, 2>()); + if (Dev.has(sycl::aspect::fp64)) { + TEST2(sycl::dot, double, 32, 0, VDoubleD4.swizzle<0, 1, 2>(), VDoubleD3_2); + TEST2(sycl::dot, double, 32, 0, VDoubleD3, VDoubleD4_2.swizzle<0, 1, 2>()); + TEST2(sycl::dot, double, 32, 0, VDoubleD4.swizzle<0, 1, 2>(), + VDoubleD4_2.swizzle<0, 1, 2>()); + } + + TEST2(sycl::length, float, 3.741657f, 1e-6, VFloatD4.swizzle<0, 1, 2>()); + if (Dev.has(sycl::aspect::fp64)) { + TEST2(sycl::length, double, 3.741657, 1e-6, VDoubleD4.swizzle<0, 1, 2>()); + } + + TEST2(sycl::distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), VFloatD3_2); + TEST2(sycl::distance, float, 5.f, 0, VFloatD3, VFloatD4_2.swizzle<0, 1, 2>()); + TEST2(sycl::distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), + VFloatD4_2.swizzle<0, 1, 2>()); + if (Dev.has(sycl::aspect::fp64)) { + TEST2(sycl::distance, double, 5.0, 0, VDoubleD4.swizzle<0, 1, 2>(), + VDoubleD3_2); + TEST2(sycl::distance, double, 5.0, 0, VDoubleD3, + VDoubleD4_2.swizzle<0, 1, 2>()); + TEST2(sycl::distance, double, 5.0, 0, VDoubleD4.swizzle<0, 1, 2>(), + VDoubleD4_2.swizzle<0, 1, 2>()); + } + + TEST(sycl::normalize, float, 3, EXPECTED(float, 0.267261, 0.534522, 0.801784), + 1e-6, VFloatD4.swizzle<0, 1, 2>()); + if (Dev.has(sycl::aspect::fp64)) { + TEST(sycl::normalize, double, 3, + EXPECTED(double, 0.267261, 0.534522, 0.801784), 1e-6, + VDoubleD4.swizzle<0, 1, 2>()); + } + + TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), + VFloatD3_2); + TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD3, + VFloatD4_2.swizzle<0, 1, 2>()); + TEST2(sycl::fast_distance, float, 5.f, 0, VFloatD4.swizzle<0, 1, 2>(), + VFloatD4_2.swizzle<0, 1, 2>()); + + TEST2(sycl::fast_length, float, 3.741657f, 1e-6, VFloatD4.swizzle<0, 1, 2>()); + TEST(sycl::fast_normalize, float, 3, EXPECTED(float, 0.267261f, 0.534522f, 0.801784f), 1e-3, VFloatD4.swizzle<0, 1, 2>()); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES return 0; } diff --git a/sycl/test-e2e/Basic/built-ins/vec_math.cpp b/sycl/test-e2e/Basic/built-ins/vec_math.cpp index 50ffa97ff7f66..aa10e6a86b870 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_math.cpp @@ -2,6 +2,8 @@ // RUN: %{build} %{mathflags} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} %{mathflags} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} // RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{mathflags} %s -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} @@ -97,30 +99,51 @@ int main() { sycl::vec va13{1.4f, 4.2f}; TEST(sycl::fabs, float, 3, EXPECTED(float, 180, 180, 180), 0, va5); - TEST(sycl::fabs, float, 2, EXPECTED(float, 180, 180), 0, va5.swizzle<0, 1>()); TEST(sycl::ilogb, int, 3, EXPECTED(int, 7, 7, 7), 0, va3); - TEST(sycl::ilogb, int, 2, EXPECTED(int, 7, 7), 0, va3.swizzle<0, 1>()); TEST(sycl::fmax, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va1, va2); + TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, 5.0f); + TEST(sycl::ldexp, float, 3, EXPECTED(float, 360, 360, 360), 0, va3, va4); + TEST(sycl::rootn, float, 3, EXPECTED(float, 180, 180, 180), 0.1, va3, va4); + TEST2(sycl::fract, float, float, 3, EXPECTED(float, 0.4f, 0.2f, 0.3f), + EXPECTED(float, 1, 4, 5), 0.0001, va6); + TEST2(sycl::modf, float, float, 3, EXPECTED(float, 0.4f, 0.2f, 0.3f), + EXPECTED(float, 1, 4, 5), 0.0001, va6); + TEST2(sycl::sincos, float, float, 3, + EXPECTED(float, 0.98545f, -0.871576f, -0.832267f), + EXPECTED(float, 0.169967, -0.490261, 0.554375), 0.0001, va6); + TEST2(sycl::frexp, float, int, 3, EXPECTED(float, 0.7f, 0.525f, 0.6625f), + EXPECTED(int, 1, 3, 3), 0.0001, va6); + TEST2(sycl::lgamma_r, float, int, 3, + EXPECTED(float, -0.119613f, 2.04856f, 3.63964f), EXPECTED(int, 1, 1, 1), + 0.0001, va6); + TEST2(sycl::remquo, float, int, 3, EXPECTED(float, 1.4f, 4.2f, 5.3f), + EXPECTED(int, 0, 0, 0), 0.0001, va6, va3); + TEST3(sycl::nan, float, 3, va7); + if (deviceQueue.get_device().has(sycl::aspect::fp64)) { + TEST3(sycl::nan, double, 3, va8); + } + TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.1, + va1); + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + TEST(sycl::fabs, float, 2, EXPECTED(float, 180, 180), 0, va5.swizzle<0, 1>()); + TEST(sycl::ilogb, int, 2, EXPECTED(int, 7, 7), 0, va3.swizzle<0, 1>()); TEST(sycl::fmax, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va9.swizzle<0, 1>(), va2); TEST(sycl::fmax, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va1, va10.swizzle<0, 1>()); TEST(sycl::fmax, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, va9.swizzle<0, 1>(), va10.swizzle<0, 1>()); - TEST(sycl::fmax, float, 2, EXPECTED(float, 5.0f, 5.0f), 0, va1, 5.0f); TEST(sycl::fmax, float, 2, EXPECTED(float, 5.0f, 5.0f), 0, va9.swizzle<0, 1>(), 5.0f); - TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va2); TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va9.swizzle<0, 1>(), va2); TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, va10.swizzle<0, 1>()); TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va9.swizzle<0, 1>(), va10.swizzle<0, 1>()); - TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va1, 5.0f); TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, va9.swizzle<0, 1>(), 5.0f); - TEST(sycl::ldexp, float, 3, EXPECTED(float, 360, 360, 360), 0, va3, va4); TEST(sycl::ldexp, float, 2, EXPECTED(float, 360, 360), 0, va3.swizzle<0, 1>(), va12); TEST(sycl::ldexp, float, 2, EXPECTED(float, 360, 360), 0, va11, @@ -144,46 +167,29 @@ int main() { va4.swizzle<0, 1>()); TEST(sycl::rootn, float, 2, EXPECTED(float, 180, 180), 0.1, va3.swizzle<0, 1>(), va4.swizzle<0, 1>()); - TEST2(sycl::fract, float, float, 3, EXPECTED(float, 0.4f, 0.2f, 0.3f), - EXPECTED(float, 1, 4, 5), 0.0001, va6); TEST2(sycl::fract, float, float, 2, EXPECTED(float, 0.4f, 0.2f), EXPECTED(float, 1, 4), 0.0001, va6.swizzle<0, 1>()); - TEST2(sycl::modf, float, float, 3, EXPECTED(float, 0.4f, 0.2f, 0.3f), - EXPECTED(float, 1, 4, 5), 0.0001, va6); TEST2(sycl::modf, float, float, 2, EXPECTED(float, 0.4f, 0.2f), EXPECTED(float, 1, 4), 0.0001, va6.swizzle<0, 1>()); - TEST2(sycl::sincos, float, float, 3, - EXPECTED(float, 0.98545f, -0.871576f, -0.832267f), - EXPECTED(float, 0.169967, -0.490261, 0.554375), 0.0001, va6); TEST2(sycl::sincos, float, float, 2, EXPECTED(float, 0.98545f, -0.871576f), EXPECTED(float, 0.169967, -0.490261), 0.0001, va6.swizzle<0, 1>()); - TEST2(sycl::frexp, float, int, 3, EXPECTED(float, 0.7f, 0.525f, 0.6625f), - EXPECTED(int, 1, 3, 3), 0.0001, va6); TEST2(sycl::frexp, float, int, 2, EXPECTED(float, 0.7f, 0.525f), EXPECTED(int, 1, 3), 0.0001, va6.swizzle<0, 1>()); - TEST2(sycl::lgamma_r, float, int, 3, - EXPECTED(float, -0.119613f, 2.04856f, 3.63964f), EXPECTED(int, 1, 1, 1), - 0.0001, va6); TEST2(sycl::lgamma_r, float, int, 2, EXPECTED(float, -0.119613f, 2.04856f), EXPECTED(int, 1, 1), 0.0001, va6.swizzle<0, 1>()); - TEST2(sycl::remquo, float, int, 3, EXPECTED(float, 1.4f, 4.2f, 5.3f), - EXPECTED(int, 0, 0, 0), 0.0001, va6, va3); TEST2(sycl::remquo, float, int, 2, EXPECTED(float, 1.4f, 4.2f), EXPECTED(int, 0, 0), 0.0001, va6.swizzle<0, 1>(), va11); TEST2(sycl::remquo, float, int, 2, EXPECTED(float, 1.4f, 4.2f), EXPECTED(int, 0, 0), 0.0001, va13, va3.swizzle<0, 1>()); TEST2(sycl::remquo, float, int, 2, EXPECTED(float, 1.4f, 4.2f), EXPECTED(int, 0, 0), 0.0001, va6.swizzle<0, 1>(), va3.swizzle<0, 1>()); - TEST3(sycl::nan, float, 3, va7); TEST3(sycl::nan, float, 2, va7.swizzle<0, 1>()); if (deviceQueue.get_device().has(sycl::aspect::fp64)) { - TEST3(sycl::nan, double, 3, va8); TEST3(sycl::nan, double, 2, va8.swizzle<0, 1>()); } - TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.1, - va1); TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.1, va9.swizzle<0, 1>()); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES return 0; } diff --git a/sycl/test-e2e/Basic/built-ins/vec_relational.cpp b/sycl/test-e2e/Basic/built-ins/vec_relational.cpp index 05309e44fca40..dec34bdb60731 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_relational.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_relational.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{%{build} -o -fpreview %t_preview.out%} +// RUN: %if preview-breaking-changes-supported %{%{run} %t_preview.out%} // RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %s -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} @@ -72,86 +74,89 @@ int main() { sycl::vec c2(1, 0); TEST(sycl::isequal, int32_t, EXPECTED(int32_t, 1, 1), 2, va1, va2); + TEST(sycl::isnotequal, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va2); + TEST(sycl::isgreater, int32_t, EXPECTED(int32_t, 0, 1), 2, va1, va3); + TEST(sycl::isgreaterequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va1, va4); + TEST(sycl::isless, int32_t, EXPECTED(int32_t, 0, 1), 2, va3, va1); + TEST(sycl::islessequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va4, va1); + TEST(sycl::islessgreater, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va2); + TEST(sycl::isfinite, int32_t, EXPECTED(int32_t, 1, 1), 2, va1); + TEST(sycl::isinf, int32_t, EXPECTED(int32_t, 0, 0), 2, va1); + TEST(sycl::isnan, int32_t, EXPECTED(int32_t, 0, 0), 2, va1); + TEST(sycl::isnormal, int32_t, EXPECTED(int32_t, 1, 1), 2, va1); + TEST(sycl::isordered, int32_t, EXPECTED(int32_t, 1, 1), 2, va1, va2); + TEST(sycl::isunordered, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va2); + TEST(sycl::signbit, int32_t, EXPECTED(int32_t, 0, 0), 2, va1); + TEST2(sycl::all, int, EXPECTED(int32_t, 0), 3, va7); + TEST2(sycl::any, int, EXPECTED(int32_t, 0), 3, va7); + TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, va8, va9, va10); + TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0, 8.0), 3, va5, va6, c1); + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES TEST(sycl::isequal, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>(), va2); TEST(sycl::isequal, int32_t, EXPECTED(int32_t, 1, 1), 2, va1, va11.swizzle<0, 1>()); TEST(sycl::isequal, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::isnotequal, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va2); TEST(sycl::isnotequal, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>(), va2); TEST(sycl::isnotequal, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va11.swizzle<0, 1>()); TEST(sycl::isnotequal, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::isgreater, int32_t, EXPECTED(int32_t, 0, 1), 2, va1, va3); TEST(sycl::isgreater, int32_t, EXPECTED(int32_t, 0, 1), 2, va11.swizzle<0, 1>(), va3); TEST(sycl::isgreater, int32_t, EXPECTED(int32_t, 0, 1), 2, va1, va12.swizzle<0, 1>()); TEST(sycl::isgreater, int32_t, EXPECTED(int32_t, 0, 1), 2, va11.swizzle<0, 1>(), va12.swizzle<0, 1>()); - TEST(sycl::isgreaterequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va1, va4); TEST(sycl::isgreaterequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va11.swizzle<0, 1>(), va4); TEST(sycl::isgreaterequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va1, va13.swizzle<0, 1>()); TEST(sycl::isgreaterequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va11.swizzle<0, 1>(), va13.swizzle<0, 1>()); - TEST(sycl::isless, int32_t, EXPECTED(int32_t, 0, 1), 2, va3, va1); TEST(sycl::isless, int32_t, EXPECTED(int32_t, 0, 1), 2, va3, va11.swizzle<0, 1>()); TEST(sycl::isless, int32_t, EXPECTED(int32_t, 0, 1), 2, va12.swizzle<0, 1>(), va1); TEST(sycl::isless, int32_t, EXPECTED(int32_t, 0, 1), 2, va12.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::islessequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va4, va1); TEST(sycl::islessequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va13.swizzle<0, 1>(), va1); TEST(sycl::islessequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va4, va11.swizzle<0, 1>()); TEST(sycl::islessequal, int32_t, EXPECTED(int32_t, 0, 1), 2, va13.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::islessgreater, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va2); TEST(sycl::islessgreater, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>(), va2); TEST(sycl::islessgreater, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va11.swizzle<0, 1>()); TEST(sycl::islessgreater, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::isfinite, int32_t, EXPECTED(int32_t, 1, 1), 2, va1); TEST(sycl::isfinite, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>()); - TEST(sycl::isinf, int32_t, EXPECTED(int32_t, 0, 0), 2, va1); TEST(sycl::isinf, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>()); - TEST(sycl::isnan, int32_t, EXPECTED(int32_t, 0, 0), 2, va1); TEST(sycl::isnan, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>()); - TEST(sycl::isnormal, int32_t, EXPECTED(int32_t, 1, 1), 2, va1); TEST(sycl::isnormal, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>()); - TEST(sycl::isordered, int32_t, EXPECTED(int32_t, 1, 1), 2, va1, va2); TEST(sycl::isordered, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>(), va2); TEST(sycl::isordered, int32_t, EXPECTED(int32_t, 1, 1), 2, va1, va11.swizzle<0, 1>()); TEST(sycl::isordered, int32_t, EXPECTED(int32_t, 1, 1), 2, va11.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::isunordered, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va2); TEST(sycl::isunordered, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>(), va2); TEST(sycl::isunordered, int32_t, EXPECTED(int32_t, 0, 0), 2, va1, va11.swizzle<0, 1>()); TEST(sycl::isunordered, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>(), va11.swizzle<0, 1>()); - TEST(sycl::signbit, int32_t, EXPECTED(int32_t, 0, 0), 2, va1); TEST(sycl::signbit, int32_t, EXPECTED(int32_t, 0, 0), 2, va11.swizzle<0, 1>()); - TEST2(sycl::all, int, EXPECTED(int32_t, 0), 3, va7); TEST2(sycl::all, int, EXPECTED(int32_t, 0), 3, va14.swizzle<0, 1, 2>()); - TEST2(sycl::any, int, EXPECTED(int32_t, 0), 3, va7); TEST2(sycl::any, int, EXPECTED(int32_t, 0), 3, va14.swizzle<0, 1, 2>()); - TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, va8, va9, va10); TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, va16.swizzle<0, 1>(), va9, va10); TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, va8, @@ -166,7 +171,6 @@ int main() { va17.swizzle<0, 1>(), va18.swizzle<0, 1>()); TEST(sycl::bitselect, float, EXPECTED(float, 1.0, 1.0), 2, va16.swizzle<0, 1>(), va17.swizzle<0, 1>(), va18.swizzle<0, 1>()); - TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0, 8.0), 3, va5, va6, c1); TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0), 2, va5.swizzle<0, 1>(), va15, c2); TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0), 2, va4, @@ -183,6 +187,7 @@ int main() { va6.swizzle<0, 1>(), c1.swizzle<0, 1>()); TEST(sycl::select, float, EXPECTED(float, 1.0, 2.0), 2, va5.swizzle<0, 1>(), va6.swizzle<0, 1>(), c1.swizzle<0, 1>()); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES return 0; } diff --git a/sycl/test-e2e/DeviceLib/built-ins/nan.cpp b/sycl/test-e2e/DeviceLib/built-ins/nan.cpp index fddb26615b1cc..8bf41b8d9434d 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/nan.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/nan.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{ %{build} -fsycl-device-code-split=per_kernel -o %t2.out %} +// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} + #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp index 59b5ace6ba470..3c536970aa0b2 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{ %{build} -o %t2.out %} +// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} + #include #include diff --git a/sycl/test-e2e/USM/math.cpp b/sycl/test-e2e/USM/math.cpp index 2d95adb12ee2f..6c6fef86fcabe 100644 --- a/sycl/test-e2e/USM/math.cpp +++ b/sycl/test-e2e/USM/math.cpp @@ -2,6 +2,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// RUN: %if preview-breaking-changes-supported %{ %{build} -o %t2.out %} +// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} + #include #include diff --git a/sycl/test/basic_tests/relational_builtins.cpp b/sycl/test/basic_tests/relational_builtins.cpp index ab6daf48ff510..e10fcccb14f47 100644 --- a/sycl/test/basic_tests/relational_builtins.cpp +++ b/sycl/test/basic_tests/relational_builtins.cpp @@ -1,4 +1,5 @@ // RUN: %clangxx -fsycl %s -o %t.out +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl %s -o %t.out %} // NOTE: Compile the test fully to ensure the library exports the right host // symbols. diff --git a/sycl/test/regression/abs_diff_host.cpp b/sycl/test/regression/abs_diff_host.cpp index a242083f83e52..00b214273cabc 100644 --- a/sycl/test/regression/abs_diff_host.cpp +++ b/sycl/test/regression/abs_diff_host.cpp @@ -1,5 +1,7 @@ // RUN: %clangxx -fsycl %s -o %t.out // RUN: %t.out +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl %s -o %t2.out %} +// RUN: %if preview-breaking-changes-supported %{ %t2.out %} // Test checks that sycl::abs_diff correctly handles signed operations that // might overflow. diff --git a/sycl/test/warnings/sycl_2020_deprecations.cpp b/sycl/test/warnings/sycl_2020_deprecations.cpp index eefee088edd05..98fd788441a95 100644 --- a/sycl/test/warnings/sycl_2020_deprecations.cpp +++ b/sycl/test/warnings/sycl_2020_deprecations.cpp @@ -139,7 +139,7 @@ int main() { sycl::byte B; (void)B; - // expected-warning@+1{{'abs' is deprecated: abs for floating point types is non-standard and has been deprecated. Please use fabs instead.}} + // expected-warning@+1{{abs for floating point types is non-standard and has been deprecated. Please use fabs instead.}} sycl::abs(0.0f); // expected-warning@+1{{'image_support' is deprecated: deprecated in SYCL 2020, use device::has(aspect::ext_intel_legacy_image) to query for SYCL 1.2.1 image support}}