Skip to content

Commit 9b94ea0

Browse files
Merge pull request #1097 from IntelPython/boolean-indexing-extract-place-nonzero
Boolean indexing: extract, place, nonzero
2 parents 03c4822 + cab0035 commit 9b94ea0

12 files changed

+2949
-58
lines changed

dpctl/tensor/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ pybind11_add_module(${python_module_name} MODULE
3232
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_for_reshape.cpp
3333
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp
3434
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/integer_advanced_indexing.cpp
35+
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_advanced_indexing.cpp
3536
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/eye_ctor.cpp
3637
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp
3738
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/triul_ctor.cpp

dpctl/tensor/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
)
5959
from dpctl.tensor._device import Device
6060
from dpctl.tensor._dlpack import from_dlpack
61-
from dpctl.tensor._indexing_functions import put, take
61+
from dpctl.tensor._indexing_functions import extract, nonzero, place, put, take
6262
from dpctl.tensor._manipulation_functions import (
6363
broadcast_arrays,
6464
broadcast_to,
@@ -115,6 +115,9 @@
115115
"squeeze",
116116
"take",
117117
"put",
118+
"extract",
119+
"place",
120+
"nonzero",
118121
"from_numpy",
119122
"to_numpy",
120123
"asnumpy",

dpctl/tensor/_copy_utils.py

Lines changed: 100 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -389,45 +389,75 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
389389
return R
390390

391391

392-
def _mock_extract(ary, ary_mask, p):
393-
exec_q = dpctl.utils.get_execution_queue(
394-
(
395-
ary.sycl_queue,
396-
ary_mask.sycl_queue,
392+
def _extract_impl(ary, ary_mask, axis=0):
393+
"""Extract elements of ary by applying mask starting from slot
394+
dimension axis"""
395+
if not isinstance(ary, dpt.usm_ndarray):
396+
raise TypeError(
397+
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
398+
)
399+
if not isinstance(ary_mask, dpt.usm_ndarray):
400+
raise TypeError(
401+
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
397402
)
403+
exec_q = dpctl.utils.get_execution_queue(
404+
(ary.sycl_queue, ary_mask.sycl_queue)
398405
)
399406
if exec_q is None:
400407
raise dpctl.utils.ExecutionPlacementError(
401-
"Can not automatically determine where to allocate the "
402-
"result or performance execution. "
403-
"Use `usm_ndarray.to_device` method to migrate data to "
404-
"be associated with the same queue."
408+
"arrays have different associated queues. "
409+
"Use `Y.to_device(X.device)` to migrate."
405410
)
406-
407-
res_usm_type = dpctl.utils.get_coerced_usm_type(
408-
(
409-
ary.usm_type,
410-
ary_mask.usm_type,
411+
ary_nd = ary.ndim
412+
pp = normalize_axis_index(operator.index(axis), ary_nd)
413+
mask_nd = ary_mask.ndim
414+
if pp < 0 or pp + mask_nd > ary_nd:
415+
raise ValueError(
416+
"Parameter p is inconsistent with input array dimensions"
411417
)
418+
mask_nelems = ary_mask.size
419+
cumsum = dpt.empty(mask_nelems, dtype=dpt.int64, device=ary_mask.device)
420+
exec_q = cumsum.sycl_queue
421+
mask_count = ti.mask_positions(ary_mask, cumsum, sycl_queue=exec_q)
422+
dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
423+
dst = dpt.empty(
424+
dst_shape, dtype=ary.dtype, usm_type=ary.usm_type, device=ary.device
412425
)
413-
ary_np = dpt.asnumpy(ary)
414-
mask_np = dpt.asnumpy(ary_mask)
415-
res_np = ary_np[(slice(None),) * p + (mask_np,)]
416-
res = dpt.empty(
417-
res_np.shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q
426+
hev, _ = ti._extract(
427+
src=ary,
428+
cumsum=cumsum,
429+
axis_start=pp,
430+
axis_end=pp + mask_nd,
431+
dst=dst,
432+
sycl_queue=exec_q,
418433
)
419-
res[...] = res_np
420-
return res
434+
hev.wait()
435+
return dst
421436

422437

423-
def _mock_nonzero(ary):
438+
def _nonzero_impl(ary):
424439
if not isinstance(ary, dpt.usm_ndarray):
425-
raise TypeError
426-
q = ary.sycl_queue
440+
raise TypeError(
441+
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
442+
)
443+
exec_q = ary.sycl_queue
427444
usm_type = ary.usm_type
428-
ary_np = dpt.asnumpy(ary)
429-
nz = ary_np.nonzero()
430-
return tuple(dpt.asarray(i, usm_type=usm_type, sycl_queue=q) for i in nz)
445+
mask_nelems = ary.size
446+
cumsum = dpt.empty(
447+
mask_nelems, dtype=dpt.int64, sycl_queue=exec_q, order="C"
448+
)
449+
mask_count = ti.mask_positions(ary, cumsum, sycl_queue=exec_q)
450+
indexes = dpt.empty(
451+
(ary.ndim, mask_count),
452+
dtype=cumsum.dtype,
453+
usm_type=usm_type,
454+
sycl_queue=exec_q,
455+
order="C",
456+
)
457+
hev, _ = ti._nonzero(cumsum, indexes, ary.shape, exec_q)
458+
res = tuple(indexes[i, :] for i in range(ary.ndim))
459+
hev.wait()
460+
return res
431461

432462

433463
def _take_multi_index(ary, inds, p):
@@ -473,34 +503,57 @@ def _take_multi_index(ary, inds, p):
473503
return res
474504

475505

476-
def _mock_place(ary, ary_mask, p, vals):
506+
def _place_impl(ary, ary_mask, vals, axis=0):
507+
"""Extract elements of ary by applying mask starting from slot
508+
dimension axis"""
477509
if not isinstance(ary, dpt.usm_ndarray):
478-
raise TypeError
510+
raise TypeError(
511+
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
512+
)
479513
if not isinstance(ary_mask, dpt.usm_ndarray):
480-
raise TypeError
514+
raise TypeError(
515+
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
516+
)
517+
if not isinstance(vals, dpt.usm_ndarray):
518+
raise TypeError(
519+
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
520+
)
481521
exec_q = dpctl.utils.get_execution_queue(
482-
(ary.sycl_queue, ary_mask.sycl_queue)
522+
(ary.sycl_queue, ary_mask.sycl_queue, vals.sycl_queue)
483523
)
484-
if exec_q is not None and isinstance(vals, dpt.usm_ndarray):
485-
exec_q = dpctl.utils.get_execution_queue((exec_q, vals.sycl_queue))
486524
if exec_q is None:
487525
raise dpctl.utils.ExecutionPlacementError(
488-
"Can not automatically determine where to allocate the "
489-
"result or performance execution. "
490-
"Use `usm_ndarray.to_device` method to migrate data to "
491-
"be associated with the same queue."
526+
"arrays have different associated queues. "
527+
"Use `Y.to_device(X.device)` to migrate."
492528
)
493-
494-
ary_np = dpt.asnumpy(ary)
495-
mask_np = dpt.asnumpy(ary_mask)
496-
if isinstance(vals, dpt.usm_ndarray) or hasattr(
497-
vals, "__sycl_usm_array_interface__"
498-
):
499-
vals_np = dpt.asnumpy(vals)
529+
ary_nd = ary.ndim
530+
pp = normalize_axis_index(operator.index(axis), ary_nd)
531+
mask_nd = ary_mask.ndim
532+
if pp < 0 or pp + mask_nd > ary_nd:
533+
raise ValueError(
534+
"Parameter p is inconsistent with input array dimensions"
535+
)
536+
mask_nelems = ary_mask.size
537+
cumsum = dpt.empty(mask_nelems, dtype=dpt.int64, device=ary_mask.device)
538+
exec_q = cumsum.sycl_queue
539+
mask_count = ti.mask_positions(ary_mask, cumsum, sycl_queue=exec_q)
540+
expected_vals_shape = (
541+
ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
542+
)
543+
if vals.dtype == ary.dtype:
544+
rhs = vals
500545
else:
501-
vals_np = vals
502-
ary_np[(slice(None),) * p + (mask_np,)] = vals_np
503-
ary[...] = ary_np
546+
rhs = dpt.astype(vals, ary.dtype)
547+
rhs = dpt.broadcast_to(rhs, expected_vals_shape)
548+
hev, _ = ti._place(
549+
dst=ary,
550+
cumsum=cumsum,
551+
axis_start=pp,
552+
axis_end=pp + mask_nd,
553+
rhs=rhs,
554+
sycl_queue=exec_q,
555+
)
556+
hev.wait()
504557
return
505558

506559

dpctl/tensor/_indexing_functions.py

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121

2222
import dpctl
2323
import dpctl.tensor as dpt
24-
from dpctl.tensor._tensor_impl import _put, _take
24+
import dpctl.tensor._tensor_impl as ti
25+
26+
from ._copy_utils import _extract_impl, _nonzero_impl
2527

2628

2729
def take(x, indices, /, *, axis=None, mode="clip"):
@@ -93,7 +95,7 @@ def take(x, indices, /, *, axis=None, mode="clip"):
9395
res_shape, dtype=x.dtype, usm_type=res_usm_type, sycl_queue=exec_q
9496
)
9597

96-
hev, _ = _take(x, indices, res, axis, mode, sycl_queue=exec_q)
98+
hev, _ = ti._take(x, indices, res, axis, mode, sycl_queue=exec_q)
9799
hev.wait()
98100

99101
return res
@@ -173,5 +175,136 @@ def put(x, indices, vals, /, *, axis=None, mode="clip"):
173175

174176
vals = dpt.broadcast_to(vals, val_shape)
175177

176-
hev, _ = _put(x, indices, vals, axis, mode, sycl_queue=exec_q)
178+
hev, _ = ti._put(x, indices, vals, axis, mode, sycl_queue=exec_q)
179+
hev.wait()
180+
181+
182+
def extract(condition, arr):
183+
"""extract(condition, arr)
184+
185+
Returns the elements of an array that satisfies the condition.
186+
187+
If `condition` is boolean :func:``dpctl.tensor.extract`` is
188+
equivalent to ``arr[condition]``.
189+
190+
Note that :func:``dpctl.tensor.place`` does the opposite of
191+
:func:``dpctl.tensor.extract``.
192+
193+
Args:
194+
conditions: usm_ndarray
195+
An array whose non-zero or True entries indicate the element
196+
of `arr` to extract.
197+
arr: usm_ndarray
198+
Input array of the same size as `condition`.
199+
200+
Returns:
201+
extract: usm_ndarray
202+
Rank 1 array of values from `arr` where `condition` is True.
203+
"""
204+
if not isinstance(condition, dpt.usm_ndarray):
205+
raise TypeError(
206+
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(condition)}"
207+
)
208+
if not isinstance(arr, dpt.usm_ndarray):
209+
raise TypeError(
210+
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
211+
)
212+
exec_q = dpctl.utils.get_execution_queue(
213+
(
214+
condition.sycl_queue,
215+
arr.sycl_queue,
216+
)
217+
)
218+
if exec_q is None:
219+
raise dpctl.utils.ExecutionPlacementError
220+
if condition.shape != arr.shape:
221+
raise ValueError("Arrays are not of the same size")
222+
return _extract_impl(arr, condition)
223+
224+
225+
def place(arr, mask, vals):
226+
"""place(arr, mask, vals)
227+
228+
Change elements of an array based on conditional and input values.
229+
230+
If `mask` is boolean :func:``dpctl.tensor.place`` is
231+
equivalent to ``arr[condition] = vals``.
232+
233+
Args:
234+
arr: usm_ndarray
235+
Array to put data into.
236+
mask: usm_ndarray
237+
Boolean mask array. Must have the same size as `arr`.
238+
vals: usm_ndarray
239+
Values to put into `arr`. Only the first N elements are
240+
used, where N is the number of True values in `mask`. If
241+
`vals` is smaller than N, it will be repeated, and if
242+
elements of `arr` are to be masked, this sequence must be
243+
non-empty. Array `vals` must be one dimensional.
244+
"""
245+
if not isinstance(arr, dpt.usm_ndarray):
246+
raise TypeError(
247+
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
248+
)
249+
if not isinstance(mask, dpt.usm_ndarray):
250+
raise TypeError(
251+
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(mask)}"
252+
)
253+
if not isinstance(vals, dpt.usm_ndarray):
254+
raise TypeError(
255+
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(vals)}"
256+
)
257+
exec_q = dpctl.utils.get_execution_queue(
258+
(
259+
arr.sycl_queue,
260+
mask.sycl_queue,
261+
vals.sycl_queue,
262+
)
263+
)
264+
if exec_q is None:
265+
raise dpctl.utils.ExecutionPlacementError
266+
if arr.shape != mask.shape or vals.ndim != 1:
267+
raise ValueError("Array sizes are not as required")
268+
cumsum = dpt.empty(mask.size, dtype="i8", sycl_queue=exec_q)
269+
nz_count = ti.mask_positions(mask, cumsum, sycl_queue=exec_q)
270+
if nz_count == 0:
271+
return
272+
if vals.dtype == arr.dtype:
273+
rhs = vals
274+
else:
275+
rhs = dpt.astype(vals, arr.dtype)
276+
hev, _ = ti._place(
277+
dst=arr,
278+
cumsum=cumsum,
279+
axis_start=0,
280+
axis_end=mask.ndim,
281+
rhs=rhs,
282+
sycl_queue=exec_q,
283+
)
177284
hev.wait()
285+
286+
287+
def nonzero(arr):
288+
"""nonzero(arr)
289+
290+
Return the indices of non-zero elements.
291+
292+
Returns the tuple of usm_narrays, one for each dimension
293+
of `arr`, containing the indices of the non-zero elements
294+
in that dimension. The values of `arr` are always tested in
295+
row-major, C-style order.
296+
297+
Args:
298+
arr: usm_ndarray
299+
Input array, which has non-zero array rank.
300+
Returns:
301+
tuple_of_usm_ndarrays: tuple
302+
Indices of non-zero array elements.
303+
"""
304+
if not isinstance(arr, dpt.usm_ndarray):
305+
raise TypeError(
306+
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
307+
)
308+
if arr.ndim == 0:
309+
raise ValueError("Array of positive rank is exepcted")
310+
return _nonzero_impl(arr)

0 commit comments

Comments
 (0)