diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index f62ad49123..2818dfa6d3 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -23,6 +23,16 @@ if (GIT_FOUND) OUTPUT_VARIABLE CURRENT_RELEASE OUTPUT_STRIP_TRAILING_WHITESPACE ) + set(CURRENT_COMMIT "") + execute_process( + COMMAND ${GIT_EXECUTABLE} describe --tags + RESULT_VARIABLE result + OUTPUT_VARIABLE CURRENT_COMMIT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if (NOT "${CURRENT_RELEASE}" STREQUAL "${CURRENT_COMMIT}") + set(CURRENT_RELEASE "master") + endif () endif (GIT_FOUND) set(DOXYGEN_INPUT_DIR ../dpctl-capi) diff --git a/docs/README.md b/docs/README.md index 7c14af8a0c..73bead65f5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,10 +4,10 @@ What? Generator scripts for dpCtl API documentation. To run these scripts, follow the following steps: -`mkdir build` -`cd build` -`cmake -DDPCTL_DOCGEN_PREFIX=` -`make Sphinx` +`mkdir build`
+`cd build`
+`cmake -DDPCTL_DOCGEN_PREFIX=`
+`make Sphinx`
The `DPCTL_DOCGEN_PREFIX` flag is optional and can be omitted to generate the documents in the current source directory in a sub-directory called diff --git a/docs/dpCtl.dptensor_api.rst b/docs/dpCtl.dptensor_api.rst new file mode 100644 index 0000000000..51cebf7c05 --- /dev/null +++ b/docs/dpCtl.dptensor_api.rst @@ -0,0 +1,8 @@ +.. _dpCtl.dptensor_api: + +######################### +dpCtl dptensor Python API +######################### + +.. automodule:: dpctl.dptensor + :members: diff --git a/docs/dpCtl.program_api.rst b/docs/dpCtl.program_api.rst new file mode 100644 index 0000000000..c6163e26c5 --- /dev/null +++ b/docs/dpCtl.program_api.rst @@ -0,0 +1,8 @@ +.. _dpCtl.program_api: + +######################## +dpCtl Program Python API +######################## + +.. automodule:: dpctl.program + :members: diff --git a/docs/index.rst b/docs/index.rst index bf9a77efda..6bc50c3f16 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,10 +19,9 @@ Indices and tables * :ref:`search` .. toctree:: - :maxdepth: 2 + :maxdepth: 3 :caption: Contents: self - dpCtl Python API - dpctl.memory Python API + toc_pyapi api/dpCtl-CAPI_root diff --git a/docs/toc_pyapi.rst b/docs/toc_pyapi.rst new file mode 100644 index 0000000000..2750aa41ab --- /dev/null +++ b/docs/toc_pyapi.rst @@ -0,0 +1,10 @@ +Python API +================ + +.. toctree:: + :maxdepth: 1 + + dpctl - SYCL runtime wrapper classes and queue manager + dpctl.memory - USM memory manager + dpctl.dptensor - Data-parallel tensor containers + dpctl.program - Program manager diff --git a/dpctl/__init__.py b/dpctl/__init__.py index d453eeeb45..97277188f5 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -1,5 +1,4 @@ -# ===---------------- __init__.py - dpctl module -------*- Cython -*--------===# -# +# ===-----------------------------------------------------------------------===# # Data Parallel Control (dpCtl) # # Copyright 2020 Intel Corporation @@ -17,11 +16,7 @@ # limitations under the License. # # ===-----------------------------------------------------------------------===# -# -# \file -# The top-level dpctl module. -# -# ===-----------------------------------------------------------------------===# + """ **Data Parallel Control (dpCtl)** @@ -31,8 +26,14 @@ a common runtime to manage specific SYCL resources, such as devices and USM memory, for SYCL-based Python packages and extension modules. - Currently, dpCtl has two main features: a global SYCL queue manager - and a USM memory manager. + The main features presently provided by dpCtl are: + + * A SYCL queue manager exposed directly inside the top-level `dpctl` + module. + * A USM memory manager (`dpctl.memory`) that provides Python objects + implementing the Python buffer protocol using USM shared and USM host + allocators. The memory manager also exposes various utility functions + to wrap SYCL's USM allocators, deallocators, `memcpy` functions, *etc.* """ __author__ = "Intel Corp." diff --git a/dpctl/_sycl_core.pyx b/dpctl/_sycl_core.pyx index 997ec0edef..f163090403 100644 --- a/dpctl/_sycl_core.pyx +++ b/dpctl/_sycl_core.pyx @@ -303,7 +303,7 @@ cdef class SyclQueue: """ @staticmethod - cdef SyclQueue _create (DPCTLSyclQueueRef qref): + cdef SyclQueue _create(DPCTLSyclQueueRef qref): if qref is NULL: raise SyclQueueCreationError("Queue creation failed.") cdef SyclQueue ret = SyclQueue.__new__(SyclQueue) @@ -605,7 +605,7 @@ cdef class _SyclRTManager: cdef dict _backend_enum_ty_dict cdef dict _device_enum_ty_dict - def __cinit__ (self): + def __cinit__(self): self._backend_str_ty_dict = { "opencl" : _backend_type._OPENCL, @@ -627,7 +627,7 @@ cdef class _SyclRTManager: device_type.gpu : _device_type._GPU, } - def _set_as_current_queue (self, backend_ty, device_ty, device_id): + def _set_as_current_queue(self, backend_ty, device_ty, device_id): cdef DPCTLSyclQueueRef queue_ref try : @@ -642,45 +642,47 @@ cdef class _SyclRTManager: raise UnsupportedBackendError("Backend can only be opencl or " "level-0") - def _remove_current_queue (self): + def _remove_current_queue(self): DPCTLQueueMgr_PopQueue() - def dump (self): + def dump(self): """ Prints information about the Runtime object. """ DPCTLPlatform_DumpInfo() - def print_available_backends (self): - """ Prints the available backends. + def print_available_backends(self): + """ Prints the available SYCL backends. """ print(self._backend_str_ty_dict.keys()) - cpdef get_current_backend (self): - """ Returns the backend for the current queue as `backend_type` enum + cpdef get_current_backend(self): + """ Returns the backend for the current queue as a `backend_type` enum """ return self.get_current_queue().get_sycl_backend() - cpdef get_current_device_type (self): - """ Returns current device type as `device_type` enum + cpdef get_current_device_type(self): + """ Returns current device type as a `device_type` enum """ return self.get_current_queue().get_sycl_device().get_device_type() - cpdef SyclQueue get_current_queue (self): - """ Returns the activated SYCL queue as a PyCapsule. + cpdef SyclQueue get_current_queue(self): + """ Returns the currently activate SYCL queue as a new SyclQueue object. + If there are no active queues then a SyclQueueCreationError exception is + raised. """ return SyclQueue._create(DPCTLQueueMgr_GetCurrentQueue()) - def get_num_activated_queues (self): - """ Return the number of currently activated queues for this thread. + def get_num_activated_queues(self): + """ Returns the number of currently activated queues for this thread. """ return DPCTLQueueMgr_GetNumActivatedQueues() - def get_num_platforms (self): + def get_num_platforms(self): """ Returns the number of available non-host SYCL platforms. """ return DPCTLPlatform_GetNumNonHostPlatforms() - def get_num_queues (self, backend_ty, device_ty): + def get_num_queues(self, backend_ty, device_ty): cdef size_t num = 0 try : beTy = self._backend_enum_ty_dict[backend_ty] @@ -699,7 +701,7 @@ cdef class _SyclRTManager: return num - def has_gpu_queues (self, backend_ty=backend_type.opencl): + def has_gpu_queues(self, backend_ty=backend_type.opencl): cdef size_t num = 0 try : beTy = self._backend_enum_ty_dict[backend_ty] @@ -714,7 +716,7 @@ cdef class _SyclRTManager: else: return False - def has_cpu_queues (self, backend_ty=backend_type.opencl): + def has_cpu_queues(self, backend_ty=backend_type.opencl): cdef size_t num = 0 try : beTy = self._backend_enum_ty_dict[backend_ty] @@ -729,21 +731,21 @@ cdef class _SyclRTManager: else: return False - def has_sycl_platforms (self): + def has_sycl_platforms(self): cdef size_t num_platforms = DPCTLPlatform_GetNumNonHostPlatforms() if num_platforms: return True else: return False - def is_in_device_context (self): + def is_in_device_context(self): cdef size_t num = DPCTLQueueMgr_GetNumActivatedQueues() if num: return True else: return False - def set_default_queue (self, backend_ty, device_ty, device_id): + def set_default_queue(self, backend_ty, device_ty, device_id): cdef DPCTLSyclQueueRef ret try : if isinstance(backend_ty, str): @@ -785,8 +787,17 @@ set_default_queue = _mgr.set_default_queue is_in_device_context = _mgr.is_in_device_context cpdef SyclQueue get_current_queue(): - """ - Obtain current Sycl Queue from Data Parallel Control package. + """ Returns the currently activate SYCL queue as a new SyclQueue object. + + Returns: + SyclQueue: If there is a currently active SYCL queue that queue + is returned wrapped in a SyclQueue object. The SyclQueue object + owns a copy of the currently active SYCL queue as an opaque + `DPCTLSyclQueueRef` pointer. The pointer is freed when the SyclQueue + is garbage collected. + + Raises: + SyclQueueCreationError: If no currently active SYCL queue found. """ return _mgr.get_current_queue() @@ -805,7 +816,7 @@ cpdef get_current_backend(): from contextlib import contextmanager @contextmanager -def device_context (str queue_str="opencl:gpu:0"): +def device_context(str queue_str="opencl:gpu:0"): """ The SYCL queue defined by the "backend:device type:device id" tuple is set as the currently active queue, *i.e.*, a subsequent call to diff --git a/dpctl/dptensor/__init__.py b/dpctl/dptensor/__init__.py index c7695fcd4f..e427b82718 100644 --- a/dpctl/dptensor/__init__.py +++ b/dpctl/dptensor/__init__.py @@ -1 +1,32 @@ +# ===-----------------------------------------------------------------------===# +# Data Parallel Control (dpCtl) +# +# Copyright 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===-----------------------------------------------------------------------===# +""" + **Data Parallel Tensor Collection** + + `dpctl.dptensor` is an experimental collection of tensor implementations + that will implement future Python data API (https://data-apis.github.io/array-api/latest/). + + Available tensor implementations: + + * `numpy_usm_shared`: Provides a `numpy.ndarray` sub-class whose \ + underlying memory buffer is allocated with a USM shared memory allocator. + +""" + import dpctl.dptensor.numpy_usm_shared diff --git a/dpctl/dptensor/numpy_usm_shared.py b/dpctl/dptensor/numpy_usm_shared.py index 2c790bc1dd..ff7a5315e4 100644 --- a/dpctl/dptensor/numpy_usm_shared.py +++ b/dpctl/dptensor/numpy_usm_shared.py @@ -190,8 +190,8 @@ def __array_finalize__(self, obj): return # When called in new-from-template, `obj` is another instance of our own # subclass, that we might use to update the new `self` instance. - # However, when called from view casting, `obj` can be an instance of any - # subclass of ndarray, including our own. + # However, when called from view casting, `obj` can be an instance of + # any subclass of ndarray, including our own. if hasattr(obj, array_interface_property): return for ext_checker in ndarray.external_usm_checkers: @@ -204,14 +204,16 @@ def __array_finalize__(self, obj): return ob = ob.base - # Just raise an exception since __array_ufunc__ makes all reasonable cases not - # need the code below. + # Just raise an exception since __array_ufunc__ makes all + # reasonable cases not need the code below. raise ValueError( - "Non-USM allocated ndarray can not viewed as a USM-allocated one without a copy" + "Non-USM allocated ndarray can not viewed as a USM-allocated \ + one without a copy" ) - # Tell Numba to not treat this type just like a NumPy ndarray but to propagate its type. - # This way it will use the custom numpy_usm_shared allocator. + # Tell Numba to not treat this type just like a NumPy ndarray but to + # propagate its type. This way it will use the custom numpy_usm_shared + # allocator. __numba_no_subtype_ndarray__ = True # Convert to a NumPy ndarray. @@ -257,8 +259,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): out_as_np = np.ndarray(out.shape, out.dtype, out) kwargs["out"] = out_as_np else: - # If they manually gave numpy_usm_shared as out kwarg then we have to also - # cast as regular NumPy ndarray to avoid recursion. + # If they manually gave numpy_usm_shared as out kwarg then we + # have to also cast as regular NumPy ndarray to avoid recursion. if isinstance(kwargs["out"], ndarray): out = kwargs["out"] kwargs["out"] = np.ndarray(out.shape, out.dtype, out) @@ -282,7 +284,8 @@ def isdef(x): cname = c[0] if isdef(cname): continue - # For now we do the simple thing and copy the types from NumPy module into numpy_usm_shared module. + # For now we do the simple thing and copy the types from NumPy module + # into numpy_usm_shared module. new_func = "%s = np.%s" % (cname, cname) try: the_code = compile(new_func, "__init__", "exec")