Skip to content

[ESIMD] Overloading sycl sin,cos,exp,log functions for ESIMD arguments #3717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions sycl/include/CL/sycl/builtins_esimd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//==----------- builtins_esimd.hpp - SYCL ESIMD built-in functions ---------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/detail/boolean.hpp>
#include <CL/sycl/detail/builtins.hpp>
#include <CL/sycl/detail/common.hpp>
#include <CL/sycl/detail/generic_type_traits.hpp>
#include <CL/sycl/types.hpp>
#include <sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp>

// TODO Decide whether to mark functions with this attribute.
#define __NOEXC /*noexcept*/
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for device it should be noexcept, as for all device functions, for host - same as usual SYCL host.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied it from builtins.hpp.


__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {

// cos
template <int SZ>
ESIMD_NODEBUG ESIMD_INLINE __ESIMD_NS::simd<float, SZ>
cos(__ESIMD_NS::simd<float, SZ> x) __NOEXC {
#ifdef __SYCL_DEVICE_ONLY__
return __ESIMD_NS::detail::ocl_cos<SZ>(x.data());
#else
return __esimd_cos<SZ>(x.data());
#endif // __SYCL_DEVICE_ONLY__
}

// sin
template <int SZ>
ESIMD_NODEBUG ESIMD_INLINE __ESIMD_NS::simd<float, SZ>
sin(__ESIMD_NS::simd<float, SZ> x) __NOEXC {
#ifdef __SYCL_DEVICE_ONLY__
return __ESIMD_NS::detail::ocl_sin<SZ>(x.data());
#else
return __esimd_sin<SZ>(x.data());
#endif // __SYCL_DEVICE_ONLY__
}

// exp
template <int SZ>
ESIMD_NODEBUG ESIMD_INLINE __ESIMD_NS::simd<float, SZ>
exp(__ESIMD_NS::simd<float, SZ> x) __NOEXC {
#ifdef __SYCL_DEVICE_ONLY__
return __ESIMD_NS::detail::ocl_exp<SZ>(x.data());
#else
return __esimd_exp<SZ>(x.data());
#endif // __SYCL_DEVICE_ONLY__
}

// log
template <int SZ>
ESIMD_NODEBUG ESIMD_INLINE __ESIMD_NS::simd<float, SZ>
log(__ESIMD_NS::simd<float, SZ> x) __NOEXC {
#ifdef __SYCL_DEVICE_ONLY__
return __ESIMD_NS::detail::ocl_log<SZ>(x.data());
#else
return __esimd_log<SZ>(x.data());
#endif // __SYCL_DEVICE_ONLY__
}

} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)

#undef __NOEXC
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#pragma once

#include <CL/sycl/builtins.hpp>
#include <sycl/ext/intel/experimental/esimd/common.hpp>
#include <sycl/ext/intel/experimental/esimd/detail/host_util.hpp>
#include <sycl/ext/intel/experimental/esimd/detail/types.hpp>
Expand Down Expand Up @@ -316,7 +317,58 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<Ty, N>
__esimd_dp4(__SEIEED::vector_type_t<Ty, N> v1,
__SEIEED::vector_type_t<Ty, N> v2);

#ifndef __SYCL_DEVICE_ONLY__
#ifdef __SYCL_DEVICE_ONLY__

// lane-id for reusing scalar math functions.
// Depending upon the SIMT mode(8/16/32), the return value is
// in the range of 0-7, 0-15, or 0-31.
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION int __esimd_lane_id();

// Wrapper for designating a scalar region of code that will be
// vectorized by the backend compiler.
#define __ESIMD_SIMT_BEGIN(N, lane) \
[&]() SYCL_ESIMD_FUNCTION ESIMD_NOINLINE \
[[intel::sycl_esimd_vectorize(N)]] { \
int lane = __esimd_lane_id();
#define __ESIMD_SIMT_END \
} \
();

#define ESIMD_MATH_INTRINSIC_IMPL(type, func) \
template <int SZ> \
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<type, SZ> \
ocl_##func(__SEIEED::vector_type_t<type, SZ> src0) { \
__SEIEED::vector_type_t<type, SZ> retv; \
__ESIMD_SIMT_BEGIN(SZ, lane) \
retv[lane] = sycl::func(src0[lane]); \
__ESIMD_SIMT_END \
return retv; \
}

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace ext {
namespace intel {
namespace experimental {
namespace esimd {
namespace detail {
ESIMD_MATH_INTRINSIC_IMPL(float, sin)
ESIMD_MATH_INTRINSIC_IMPL(float, cos)
ESIMD_MATH_INTRINSIC_IMPL(float, exp)
ESIMD_MATH_INTRINSIC_IMPL(float, log)
} // namespace detail
} // namespace esimd
} // namespace experimental
} // namespace intel
} // namespace ext
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)

#undef __ESIMD_SIMT_BEGIN
#undef __ESIMD_SIMT_END
#undef ESIMD_MATH_INTRINSIC_IMPL

#else // __SYCL_DEVICE_ONLY__

template <typename T>
inline T extract(const uint32_t &width, const uint32_t &offset, uint32_t src,
Expand Down Expand Up @@ -1277,6 +1329,6 @@ __esimd_reduced_smin(__SEIEED::vector_type_t<Ty, N> src1,

#undef __SEIEEED

#endif // #ifndef __SYCL_DEVICE_ONLY__
#endif // #ifdef __SYCL_DEVICE_ONLY__

#undef __SEIEED
34 changes: 34 additions & 0 deletions sycl/test/esimd/lane_id.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o - | FileCheck %s

// This test checks the codegen for the basic usage of __ESIMD_SIMT_BEGIN -
// __ESIMD_SIMT_END construct.

#include <CL/sycl.hpp>
#include <CL/sycl/INTEL/esimd.hpp>

using namespace cl::sycl;
using namespace sycl::ext::intel::experimental::esimd;

// Wrapper for designating a scalar region of code that will be
// vectorized by the backend compiler.
#define SIMT_BEGIN(N, lane) \
[&]() SYCL_ESIMD_FUNCTION ESIMD_NOINLINE \
[[intel::sycl_esimd_vectorize(N)]] { \
int lane = __esimd_lane_id();
#define SIMT_END \
} \
();

// CHECK-LABEL: define dso_local spir_func void @_Z3fooi
//CHECK: call spir_func void @_ZZ3fooiENKUlvE_clEv(
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<int, 16> foo(int x) {
simd<int, 16> v = 0;
SIMT_BEGIN(16, lane)
//CHECK: define internal spir_func void @_ZZ3fooiENKUlvE_clEv({{.*}}) {{.*}} #[[ATTR:[0-9]+]]
//CHECK: %{{[0-9a-zA-Z_.]+}} = tail call spir_func i32 @_Z15__esimd_lane_idv()
v.select<1, 0>(lane) = x++;
SIMT_END
return v;
}

//CHECK: attributes #[[ATTR]] = { {{.*}} "CMGenxSIMT"="16" {{.*}}}
41 changes: 41 additions & 0 deletions sycl/test/esimd/math_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o - | FileCheck %s

// This test checks the codegen for the following ESIMD APIs:
// sin, cos, exp, log.

#include <CL/sycl.hpp>
#include <CL/sycl/INTEL/esimd.hpp>
#include <CL/sycl/builtins_esimd.hpp>

using namespace cl::sycl;
using namespace sycl::ext::intel::experimental::esimd;

// Math sin,cos,log,exp functions are translated into scalar __spirv_ocl_ calls
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> sycl_math(simd<float, 16> x) {
simd<float, 16> v = 0;
//CHECK: call spir_func float @_Z15__spirv_ocl_cosf
v = sycl::cos(x);
//CHECK: call spir_func float @_Z15__spirv_ocl_sinf
v = sycl::sin(v);
//CHECK: call spir_func float @_Z15__spirv_ocl_logf
v = sycl::log(v);
//CHECK: call spir_func float @_Z15__spirv_ocl_expf
v = sycl::exp(v);
return v;
}

// Math sin,cos,log,exp functions from esimd namespace are translated
// into vector __esimd_ calls, which later translate into GenX intrinsics.
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16>
esimd_math(simd<float, 16> x) {
simd<float, 16> v = 0;
//CHECK: call spir_func <16 x float> @_Z11__esimd_cos
v = esimd_cos(x);
//CHECK: call spir_func <16 x float> @_Z11__esimd_sin
v = esimd_sin(v);
//CHECK: call spir_func <16 x float> @_Z11__esimd_log
v = esimd_log(v);
//CHECK: call spir_func <16 x float> @_Z11__esimd_exp
v = esimd_exp(v);
return v;
}