Skip to content

Commit 47466ee

Browse files
authored
Merge pull request #1221 from IntelPython/implemented_floor_divide
Implemented floor_divide() function.
2 parents 69d420e + 1c18270 commit 47466ee

File tree

6 files changed

+577
-4
lines changed

6 files changed

+577
-4
lines changed

dpctl/tensor/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
equal,
101101
exp,
102102
expm1,
103+
floor_divide,
103104
imag,
104105
isfinite,
105106
isinf,
@@ -213,4 +214,5 @@
213214
"equal",
214215
"not_equal",
215216
"sum",
217+
"floor_divide",
216218
]

dpctl/tensor/_elementwise_funcs.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,31 @@
270270
# FIXME: implement U15
271271

272272
# B10: ==== FLOOR_DIVIDE (x1, x2)
273-
# FIXME: implement B10
273+
_floor_divide_docstring_ = """
274+
floor_divide(x1, x2, out=None, order='K')
275+
276+
Calculates the ratio for each element `x1_i` of the input array `x1` with
277+
the respective element `x2_i` of the input array `x2` to the greatest
278+
integer-value number that is not greater than the division result.
279+
280+
Args:
281+
x1 (usm_ndarray):
282+
First input array, expected to have numeric data type.
283+
x2 (usm_ndarray):
284+
Second input array, also expected to have numeric data type.
285+
Returns:
286+
usm_narray:
287+
an array containing the result of element-wise floor division.
288+
The data type of the returned array is determined by the Type
289+
Promotion Rules.
290+
"""
291+
292+
floor_divide = BinaryElementwiseFunc(
293+
"floor_divide",
294+
ti._floor_divide_result_type,
295+
ti._floor_divide,
296+
_floor_divide_docstring_,
297+
)
274298

275299
# B11: ==== GREATER (x1, x2)
276300
# FIXME: implement B11
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
//=== floor_divide.hpp - Binary function FLOOR_DIVIDE ------ *-C++-*--/===//
2+
//
3+
// Data Parallel Control (dpctl)
4+
//
5+
// Copyright 2020-2023 Intel Corporation
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing, software
14+
// distributed under the License is distributed on an "AS IS" BASIS,
15+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
// See the License for the specific language governing permissions and
17+
// limitations under the License.
18+
//
19+
//===---------------------------------------------------------------------===//
20+
///
21+
/// \file
22+
/// This file defines kernels for elementwise evaluation of FLOOR_DIVIDE(x1, x2)
23+
/// function.
24+
//===---------------------------------------------------------------------===//
25+
26+
#pragma once
27+
#include <CL/sycl.hpp>
28+
#include <cstddef>
29+
#include <cstdint>
30+
#include <type_traits>
31+
32+
#include "utils/offset_utils.hpp"
33+
#include "utils/type_dispatch.hpp"
34+
#include "utils/type_utils.hpp"
35+
36+
#include "kernels/elementwise_functions/common.hpp"
37+
#include <pybind11/pybind11.h>
38+
39+
namespace dpctl
40+
{
41+
namespace tensor
42+
{
43+
namespace kernels
44+
{
45+
namespace floor_divide
46+
{
47+
48+
namespace py = pybind11;
49+
namespace td_ns = dpctl::tensor::type_dispatch;
50+
namespace tu_ns = dpctl::tensor::type_utils;
51+
52+
template <typename argT1, typename argT2, typename resT>
53+
struct FloorDivideFunctor
54+
{
55+
56+
using supports_sg_loadstore =
57+
std::negation<std::disjunction<tu_ns::is_complex<argT1>,
58+
tu_ns::is_complex<argT2>>>; // TRUE
59+
using supports_vec = std::negation<
60+
std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
61+
62+
resT operator()(const argT1 &in1, const argT2 &in2)
63+
{
64+
auto tmp = in1 / in2;
65+
if constexpr (std::is_integral_v<decltype(tmp)>) {
66+
return tmp;
67+
}
68+
else {
69+
return sycl::floor(tmp);
70+
}
71+
}
72+
73+
template <int vec_sz>
74+
sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
75+
const sycl::vec<argT2, vec_sz> &in2)
76+
{
77+
auto tmp = in1 / in2;
78+
if constexpr (std::is_same_v<resT,
79+
typename decltype(tmp)::element_type> &&
80+
std::is_integral_v<resT>)
81+
{
82+
return tmp;
83+
}
84+
else if constexpr (std::is_integral_v<typename decltype(
85+
tmp)::element_type>) {
86+
using dpctl::tensor::type_utils::vec_cast;
87+
return vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
88+
tmp);
89+
}
90+
else {
91+
sycl::vec<resT, vec_sz> res = sycl::floor(tmp);
92+
if constexpr (std::is_same_v<resT,
93+
typename decltype(res)::element_type>)
94+
{
95+
return res;
96+
}
97+
else {
98+
using dpctl::tensor::type_utils::vec_cast;
99+
return vec_cast<resT, typename decltype(res)::element_type,
100+
vec_sz>(res);
101+
}
102+
}
103+
}
104+
};
105+
106+
template <typename argT1,
107+
typename argT2,
108+
typename resT,
109+
unsigned int vec_sz = 4,
110+
unsigned int n_vecs = 2>
111+
using FloorDivideContigFunctor = elementwise_common::BinaryContigFunctor<
112+
argT1,
113+
argT2,
114+
resT,
115+
FloorDivideFunctor<argT1, argT2, resT>,
116+
vec_sz,
117+
n_vecs>;
118+
119+
template <typename argT1, typename argT2, typename resT, typename IndexerT>
120+
using FloorDivideStridedFunctor = elementwise_common::BinaryStridedFunctor<
121+
argT1,
122+
argT2,
123+
resT,
124+
IndexerT,
125+
FloorDivideFunctor<argT1, argT2, resT>>;
126+
127+
template <typename T1, typename T2> struct FloorDivideOutputType
128+
{
129+
using value_type = typename std::disjunction< // disjunction is C++17
130+
// feature, supported by DPC++
131+
td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, std::int8_t>,
132+
td_ns::BinaryTypeMapResultEntry<T1,
133+
std::uint8_t,
134+
T2,
135+
std::uint8_t,
136+
std::uint8_t>,
137+
td_ns::BinaryTypeMapResultEntry<T1,
138+
std::int8_t,
139+
T2,
140+
std::int8_t,
141+
std::int8_t>,
142+
td_ns::BinaryTypeMapResultEntry<T1,
143+
std::uint16_t,
144+
T2,
145+
std::uint16_t,
146+
std::uint16_t>,
147+
td_ns::BinaryTypeMapResultEntry<T1,
148+
std::int16_t,
149+
T2,
150+
std::int16_t,
151+
std::int16_t>,
152+
td_ns::BinaryTypeMapResultEntry<T1,
153+
std::uint32_t,
154+
T2,
155+
std::uint32_t,
156+
std::uint32_t>,
157+
td_ns::BinaryTypeMapResultEntry<T1,
158+
std::int32_t,
159+
T2,
160+
std::int32_t,
161+
std::int32_t>,
162+
td_ns::BinaryTypeMapResultEntry<T1,
163+
std::uint64_t,
164+
T2,
165+
std::uint64_t,
166+
std::uint64_t>,
167+
td_ns::BinaryTypeMapResultEntry<T1,
168+
std::int64_t,
169+
T2,
170+
std::int64_t,
171+
std::int64_t>,
172+
td_ns::BinaryTypeMapResultEntry<T1,
173+
sycl::half,
174+
T2,
175+
sycl::half,
176+
sycl::half>,
177+
td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
178+
td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
179+
td_ns::DefaultResultEntry<void>>::result_type;
180+
};
181+
182+
template <typename argT1,
183+
typename argT2,
184+
typename resT,
185+
unsigned int vec_sz,
186+
unsigned int n_vecs>
187+
class floor_divide_contig_kernel;
188+
189+
template <typename argTy1, typename argTy2>
190+
sycl::event
191+
floor_divide_contig_impl(sycl::queue exec_q,
192+
size_t nelems,
193+
const char *arg1_p,
194+
py::ssize_t arg1_offset,
195+
const char *arg2_p,
196+
py::ssize_t arg2_offset,
197+
char *res_p,
198+
py::ssize_t res_offset,
199+
const std::vector<sycl::event> &depends = {})
200+
{
201+
return elementwise_common::binary_contig_impl<
202+
argTy1, argTy2, FloorDivideOutputType, FloorDivideContigFunctor,
203+
floor_divide_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
204+
arg2_offset, res_p, res_offset, depends);
205+
}
206+
207+
template <typename fnT, typename T1, typename T2>
208+
struct FloorDivideContigFactory
209+
{
210+
fnT get()
211+
{
212+
if constexpr (std::is_same_v<
213+
typename FloorDivideOutputType<T1, T2>::value_type,
214+
void>)
215+
{
216+
fnT fn = nullptr;
217+
return fn;
218+
}
219+
else {
220+
fnT fn = floor_divide_contig_impl<T1, T2>;
221+
return fn;
222+
}
223+
}
224+
};
225+
226+
template <typename fnT, typename T1, typename T2>
227+
struct FloorDivideTypeMapFactory
228+
{
229+
/*! @brief get typeid for output type of floor_divide(T1 x, T2 y) */
230+
std::enable_if_t<std::is_same<fnT, int>::value, int> get()
231+
{
232+
using rT = typename FloorDivideOutputType<T1, T2>::value_type;
233+
return td_ns::GetTypeid<rT>{}.get();
234+
}
235+
};
236+
237+
template <typename T1, typename T2, typename resT, typename IndexerT>
238+
class floor_divide_strided_strided_kernel;
239+
240+
template <typename argTy1, typename argTy2>
241+
sycl::event
242+
floor_divide_strided_impl(sycl::queue exec_q,
243+
size_t nelems,
244+
int nd,
245+
const py::ssize_t *shape_and_strides,
246+
const char *arg1_p,
247+
py::ssize_t arg1_offset,
248+
const char *arg2_p,
249+
py::ssize_t arg2_offset,
250+
char *res_p,
251+
py::ssize_t res_offset,
252+
const std::vector<sycl::event> &depends,
253+
const std::vector<sycl::event> &additional_depends)
254+
{
255+
return elementwise_common::binary_strided_impl<
256+
argTy1, argTy2, FloorDivideOutputType, FloorDivideStridedFunctor,
257+
floor_divide_strided_strided_kernel>(
258+
exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p,
259+
arg2_offset, res_p, res_offset, depends, additional_depends);
260+
}
261+
262+
template <typename fnT, typename T1, typename T2>
263+
struct FloorDivideStridedFactory
264+
{
265+
fnT get()
266+
{
267+
if constexpr (std::is_same_v<
268+
typename FloorDivideOutputType<T1, T2>::value_type,
269+
void>)
270+
{
271+
fnT fn = nullptr;
272+
return fn;
273+
}
274+
else {
275+
fnT fn = floor_divide_strided_impl<T1, T2>;
276+
return fn;
277+
}
278+
}
279+
};
280+
281+
} // namespace floor_divide
282+
} // namespace kernels
283+
} // namespace tensor
284+
} // namespace dpctl

0 commit comments

Comments
 (0)