Skip to content

Release 0.11.2 #689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [0.11.2] - 11/29/2021

### Added
- Extending `dpctl.device_context` with nested contexts (#678)

## Fixed
- Fixed issue #649 about incorrect behavior of `.T` method on sliced arrays (#653)

## [0.11.1] - 11/10/2021

### Changed
Expand Down
2 changes: 2 additions & 0 deletions dpctl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
get_current_queue,
get_num_activated_queues,
is_in_device_context,
nested_context_factories,
set_global_queue,
)

Expand Down Expand Up @@ -111,6 +112,7 @@
"get_current_queue",
"get_num_activated_queues",
"is_in_device_context",
"nested_context_factories",
"set_global_queue",
]
__all__ += [
Expand Down
38 changes: 36 additions & 2 deletions dpctl/_sycl_queue_manager.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# cython: linetrace=True

import logging
from contextlib import contextmanager
from contextlib import ExitStack, contextmanager

from .enum_types import backend_type, device_type

Expand Down Expand Up @@ -210,6 +210,22 @@ cpdef get_current_backend():
return _mgr.get_current_backend()


nested_context_factories = []


def _get_nested_contexts(ctxt):
_help_numba_dppy()
return (factory(ctxt) for factory in nested_context_factories)


def _help_numba_dppy():
"""Import numba-dppy for registering nested contexts"""
try:
import numba_dppy
except Exception:
pass


@contextmanager
def device_context(arg):
"""
Expand All @@ -222,6 +238,9 @@ def device_context(arg):
the context manager's scope. The yielded queue is removed as the currently
usable queue on exiting the context manager.

You can register context factory in the list of factories.
This context manager uses context factories to create and activate nested contexts.

Args:

queue_str (str) : A string corresponding to the DPC++ filter selector.
Expand All @@ -243,11 +262,26 @@ def device_context(arg):
with dpctl.device_context("level0:gpu:0"):
pass

The following example registers nested context factory:

.. code-block:: python

import dctl

def factory(sycl_queue):
...
return context

dpctl.nested_context_factories.append(factory)

"""
ctxt = None
try:
ctxt = _mgr._set_as_current_queue(arg)
yield ctxt
with ExitStack() as stack:
for nested_context in _get_nested_contexts(ctxt):
stack.enter_context(nested_context)
yield ctxt
finally:
# Code to release resource
if ctxt:
Expand Down
50 changes: 41 additions & 9 deletions dpctl/tensor/_usmarray.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1014,31 +1014,62 @@ cdef usm_ndarray _real_view(usm_ndarray ary):
"""
View into real parts of a complex type array
"""
cdef usm_ndarray r = ary._clone()
cdef int r_typenum_ = -1
cdef usm_ndarray r = None
cdef Py_ssize_t offset_elems = 0

if (ary.typenum_ == UAR_CFLOAT):
r.typenum_ = UAR_FLOAT
r_typenum_ = UAR_FLOAT
elif (ary.typenum_ == UAR_CDOUBLE):
r.typenum_ = UAR_DOUBLE
r_typenum_ = UAR_DOUBLE
else:
raise InternalUSMArrayError(
"_real_view call on array of non-complex type.")

offset_elems = ary.get_offset() * 2
r = usm_ndarray.__new__(
usm_ndarray,
_make_int_tuple(ary.nd_, ary.shape_),
dtype=_make_typestr(r_typenum_),
strides=tuple(2 * si for si in ary.strides),
buffer=ary.base_,
offset=offset_elems,
order=('C' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'F')
)
r.flags_ = ary.flags_
r.array_namespace_ = ary.array_namespace_
return r


cdef usm_ndarray _imag_view(usm_ndarray ary):
"""
View into imaginary parts of a complex type array
"""
cdef usm_ndarray r = ary._clone()
cdef int r_typenum_ = -1
cdef usm_ndarray r = None
cdef Py_ssize_t offset_elems = 0

if (ary.typenum_ == UAR_CFLOAT):
r.typenum_ = UAR_FLOAT
r_typenum_ = UAR_FLOAT
elif (ary.typenum_ == UAR_CDOUBLE):
r.typenum_ = UAR_DOUBLE
r_typenum_ = UAR_DOUBLE
else:
raise InternalUSMArrayError(
"_real_view call on array of non-complex type.")
"_imag_view call on array of non-complex type.")

# displace pointer to imaginary part
r.data_ = r.data_ + type_bytesize(r.typenum_)
offset_elems = 2 * ary.get_offset() + 1
r = usm_ndarray.__new__(
usm_ndarray,
_make_int_tuple(ary.nd_, ary.shape_),
dtype=_make_typestr(r_typenum_),
strides=tuple(2 * si for si in ary.strides),
buffer=ary.base_,
offset=offset_elems,
order=('C' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'F')
)
r.flags_ = ary.flags_
r.array_namespace_ = ary.array_namespace_
return r


Expand All @@ -1054,7 +1085,8 @@ cdef usm_ndarray _transpose(usm_ndarray ary):
_make_reversed_int_tuple(ary.nd_, ary.strides_)
if (ary.strides_) else None),
buffer=ary.base_,
order=('F' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'C')
order=('F' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'C'),
offset=ary.get_offset()
)
r.flags_ |= (ary.flags_ & USM_ARRAY_WRITEABLE)
return r
Expand Down
72 changes: 72 additions & 0 deletions dpctl/tests/test_sycl_queue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
"""Defines unit test cases for the SyclQueueManager class.
"""

import contextlib

import pytest

import dpctl
Expand Down Expand Up @@ -156,3 +158,73 @@ def test_get_current_backend():
dpctl.set_global_queue("gpu")
elif has_cpu():
dpctl.set_global_queue("cpu")


def test_nested_context_factory_is_empty_list():
assert isinstance(dpctl.nested_context_factories, list)
assert not dpctl.nested_context_factories


@contextlib.contextmanager
def _register_nested_context_factory(factory):
dpctl.nested_context_factories.append(factory)
try:
yield
finally:
dpctl.nested_context_factories.remove(factory)


def test_register_nested_context_factory_context():
def factory():
pass

with _register_nested_context_factory(factory):
assert factory in dpctl.nested_context_factories

assert isinstance(dpctl.nested_context_factories, list)
assert not dpctl.nested_context_factories


@pytest.mark.skipif(not has_cpu(), reason="No OpenCL CPU queues available")
def test_device_context_activates_nested_context():
in_context = False
factory_called = False

@contextlib.contextmanager
def context():
nonlocal in_context
old, in_context = in_context, True
yield
in_context = old

def factory(_):
nonlocal factory_called
factory_called = True
return context()

with _register_nested_context_factory(factory):
assert not factory_called
assert not in_context

with dpctl.device_context("opencl:cpu:0"):
assert factory_called
assert in_context

assert not in_context


@pytest.mark.skipif(not has_cpu(), reason="No OpenCL CPU queues available")
@pytest.mark.parametrize(
"factory, exception, match",
[
(True, TypeError, "object is not callable"),
(lambda x: None, AttributeError, "no attribute '__exit__'"),
],
)
def test_nested_context_factory_exception_if_wrong_factory(
factory, exception, match
):
with pytest.raises(exception, match=match):
with _register_nested_context_factory(factory):
with dpctl.device_context("opencl:cpu:0"):
pass
22 changes: 22 additions & 0 deletions dpctl/tests/test_usm_ndarray_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,3 +841,25 @@ def test_reshape():
dpt.reshape(Z, Z.shape, order="invalid")
W = dpt.reshape(Z, (-1,), order="C")
assert W.shape == (Z.size,)


def test_transpose():
n, m = 2, 3
X = dpt.usm_ndarray((n, m), "f4")
Xnp = np.arange(n * m, dtype="f4").reshape((n, m))
X[:] = Xnp
assert np.array_equal(dpt.to_numpy(X.T), Xnp.T)
assert np.array_equal(dpt.to_numpy(X[1:].T), Xnp[1:].T)


def test_real_imag_views():
n, m = 2, 3
X = dpt.usm_ndarray((n, m), "c8")
Xnp_r = np.arange(n * m, dtype="f4").reshape((n, m))
Xnp_i = np.arange(n * m, 2 * n * m, dtype="f4").reshape((n, m))
Xnp = Xnp_r + 1j * Xnp_i
X[:] = Xnp
assert np.array_equal(dpt.to_numpy(X.real), Xnp.real)
assert np.array_equal(dpt.to_numpy(X.imag), Xnp.imag)
assert np.array_equal(dpt.to_numpy(X[1:].real), Xnp[1:].real)
assert np.array_equal(dpt.to_numpy(X[1:].imag), Xnp[1:].imag)