Skip to content

Commit a90d0fd

Browse files
authored
[SYCL] Add sycl_ext_intel_math_inv (#8138)
This PR aims to add sycl::ext::intel::math::inv for fp32, fp64, half, half2 to sycl intel math header. The real implementation has been provided by SYCL device library via __imf_inv/invf16/invf.
1 parent 0b1fd8d commit a90d0fd

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

sycl/include/sycl/ext/intel/math.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ double __imf_floor(double);
3636
_iml_half_internal __imf_floorf16(_iml_half_internal);
3737
float __imf_rintf(float);
3838
double __imf_rint(double);
39+
_iml_half_internal __imf_invf16(_iml_half_internal);
40+
float __imf_invf(float);
41+
double __imf_inv(double);
3942
_iml_half_internal __imf_rintf16(_iml_half_internal);
4043
float __imf_sqrtf(float);
4144
double __imf_sqrt(double);
@@ -118,6 +121,24 @@ sycl::half2 floor(sycl::half2 x) {
118121
return sycl::half2{floor(x.s0()), floor(x.s1())};
119122
}
120123

124+
template <typename Tp>
125+
std::enable_if_t<std::is_same_v<Tp, float>, float> inv(Tp x) {
126+
return __imf_invf(x);
127+
}
128+
129+
template <typename Tp>
130+
std::enable_if_t<std::is_same_v<Tp, double>, double> inv(Tp x) {
131+
return __imf_inv(x);
132+
}
133+
134+
template <typename Tp>
135+
std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> inv(Tp x) {
136+
_iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
137+
return __builtin_bit_cast(sycl::half, __imf_invf16(xi));
138+
}
139+
140+
sycl::half2 inv(sycl::half2 x) { return sycl::half2{inv(x.s0()), inv(x.s1())}; }
141+
121142
template <typename Tp>
122143
std::enable_if_t<std::is_same_v<Tp, float>, float> rint(Tp x) {
123144
return __imf_rintf(x);

0 commit comments

Comments
 (0)