From 56adc96ecadf1da028209906240a89ea0b5483a6 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Mon, 20 Dec 2021 23:53:58 -0600 Subject: [PATCH 01/12] Add a urls.json file to store all urls in docs. - Add a JSON file to store all urls. - Add a new extlinks_gen module to read urls from the urls.json into the extlinks dictionary read bu the sphinx.ext.extlinks module. - Use extlinks_gen inside conf.py. - Add license header and other minor changes to conf.py. --- docs/conf.in | 42 +++++++++++++++++++++++++++++++++-------- docs/docfiles/urls.json | 16 ++++++++++++++++ docs/extlinks_gen.py | 36 +++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 docs/docfiles/urls.json create mode 100644 docs/extlinks_gen.py diff --git a/docs/conf.in b/docs/conf.in index f3f89602f7..64af63037e 100644 --- a/docs/conf.in +++ b/docs/conf.in @@ -1,3 +1,19 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + #!/usr/bin/env python3 # -*- coding: utf-8 -*- @@ -5,19 +21,23 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) +import os +import sys + from docutils.parsers.rst import directives from sphinx.ext.autosummary import Autosummary, get_documenter from sphinx.util.inspect import safe_getattr import dpctl +sys.path.insert(0, os.path.abspath(".")) + +import extlinks_gen as urlgen + # -- Project information ----------------------------------------------------- project = "Data-parallel Control (dpctl)" -copyright = "2020, Intel Corp." +copyright = "2020-21, Intel Corp." author = "Intel Corp." version = dpctl.__version__.strip(".dirty") @@ -31,13 +51,15 @@ release = dpctl.__version__.strip(".dirty") # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - "sphinx.ext.todo", - "sphinx.ext.coverage", - "sphinx.ext.viewcode", - "sphinx.ext.githubpages", "sphinx.ext.autodoc", "sphinx.ext.autosummary", + "sphinx.ext.coverage", + "sphinx.ext.extlinks", + "sphinx.ext.githubpages", "sphinx.ext.napoleon", + "sphinx.ext.todo", + "sphinx.ext.viewcode", + "sphinxcontrib.programoutput", ] todo_include_todos = True @@ -209,3 +231,7 @@ class AutoAutoSummary(Autosummary): def setup(app): app.add_directive("autoautosummary", AutoAutoSummary) + + +# A dictionary of urls +extlinks = urlgen.create_extlinks() diff --git a/docs/docfiles/urls.json b/docs/docfiles/urls.json new file mode 100644 index 0000000000..3e0906fc41 --- /dev/null +++ b/docs/docfiles/urls.json @@ -0,0 +1,16 @@ +{ + "dpcpp_envar": "https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md", + "numa_domain": "https://en.wikipedia.org/wiki/Non-uniform_memory_access", + "oneapi": "https://www.oneapi.io/", + "oneapi_filter_selection": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/FilterSelector/FilterSelector.adoc", + "sycl_aspects": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#table.device.aspect", + "sycl_context": "https://sycl.readthedocs.io/en/latest/iface/context.html", + "sycl_device": "https://sycl.readthedocs.io/en/latest/iface/device.html", + "sycl_device_info": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_device_information_descriptors", + "sycl_device_selector": "https://sycl.readthedocs.io/en/latest/iface/device-selector.html", + "sycl_event": "https://sycl.readthedocs.io/en/latest/iface/event.html", + "sycl_platform": "https://sycl.readthedocs.io/en/latest/iface/platform.html", + "sycl_queue": "https://sycl.readthedocs.io/en/latest/iface/queue.html", + "sycl_runtime_classes": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes", + "sycl_spec_2020": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html" +} diff --git a/docs/extlinks_gen.py b/docs/extlinks_gen.py new file mode 100644 index 0000000000..caa45a9e94 --- /dev/null +++ b/docs/extlinks_gen.py @@ -0,0 +1,36 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + + +def create_extlinks(): + """Reads a JSON file to create a dictionary of urls in the format supported + by the sphinx.ect.extlinks extension. + + Returns: + dict: A dictionary that is understood by the extlinks Sphinx extension. + + """ + extlinks = {} + + with open("docfiles/urls.json") as urls_json: + urls = json.load(urls_json) + for url in urls: + url_value = urls[url] + extlinks[url] = (url_value + "%s", None) + + return extlinks From c483f3870860aca2c897dad51cba4b1bce402f3d Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:37:51 -0600 Subject: [PATCH 02/12] Sphinxcontrib is needed to show program output. --- .github/workflows/generate-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml index 69bd24eaff..3f81a5eadc 100644 --- a/.github/workflows/generate-docs.yml +++ b/.github/workflows/generate-docs.yml @@ -49,7 +49,7 @@ jobs: if: ${{ !github.event.pull_request || github.event.action != 'closed' }} shell: bash -l {0} run: | - pip install numpy cython setuptools sphinx sphinx_rtd_theme pydot graphviz + pip install numpy cython setuptools sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput - name: Checkout repo uses: actions/checkout@v2 with: From 330ca847cdbbebb8995fd759d4591c9e46b29922 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:38:28 -0600 Subject: [PATCH 03/12] Fix hyperlinks in doctrings. --- dpctl/__init__.py | 11 ++++++----- dpctl/_sycl_context.pyx | 2 +- dpctl/_sycl_device.pyx | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dpctl/__init__.py b/dpctl/__init__.py index e3ce7907c8..ca4ee9796f 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -18,11 +18,12 @@ **Data Parallel Control (dpctl)** is a Python abstraction layer over SYCL. Dpctl implements a subset of SYCL's API providing wrappers for the - SYCL runtime classes described in `Section 4.6`_ of the `SYCL 2020 spec`_. - Note that the SYCL ``device_selector`` class is not implemented, instead - there are device selection helper functions that can be used to simulate - the same behavior. Dpctl implements the ``ONEPI::filter_selector`` extension - that is included in Intel's DPC++ SYCL compiler. + SYCL runtime classes described in :sycl_runtime_classes:`Section 4.6 <>` of + the :sycl_spec_2020:`SYCL 2020 spec <>`. Note that the SYCL + ``device_selector`` class is not implemented, instead there are device + selection helper functions that can be used to simulate the same behavior. + Dpctl implements the ``ONEPI::filter_selector`` extension that is included + in Intel's DPC++ SYCL compiler. The module also includes a global SYCL queue manager. The queue manager provides convenience functions to create a global instance of diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx index 56e0094582..87103d7c8a 100644 --- a/dpctl/_sycl_context.pyx +++ b/dpctl/_sycl_context.pyx @@ -86,7 +86,7 @@ cdef class _SyclContext: cdef class SyclContext(_SyclContext): """ SyclContext(arg=None) - A Python wrapper for the `sycl context`_ C++ class. + A Python wrapper for the :sycl_context:`sycl::context <>` C++ class. There are multiple ways to create a :class:`dpctl.SyclContext` object: diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index c32575b1ae..83b409c46c 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -175,13 +175,13 @@ cdef void _init_helper(_SyclDevice device, DPCTLSyclDeviceRef DRef): cdef class SyclDevice(_SyclDevice): """ SyclDevice(arg=None) - Python equivalent for cl::sycl::device class. + A Python wrapper for the :sycl_device:`sycl::device <>` C++ class. There are two ways of creating a SyclDevice instance: - by directly passing in a filter string to the class constructor. The filter string needs to conform to the - `DPC++ filter selector SYCL extension `_. + :oneapi_filter_selection:`DPC++ filter selector SYCL extension <>`. :Example: .. code-block:: python From 02c94a052039a7f3f7cf316be081f775b3c46ab4 Mon Sep 17 00:00:00 2001 From: oleksandr-pavlyk Date: Tue, 21 Dec 2021 01:49:27 -0600 Subject: [PATCH 04/12] Add a page for the dpctl user manual defining basic SYCL concepts. --- .../manual/dpctl/basic_concepts.rst | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst diff --git a/docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst b/docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst new file mode 100644 index 0000000000..fbda045899 --- /dev/null +++ b/docs/docfiles/user_guides/manual/dpctl/basic_concepts.rst @@ -0,0 +1,75 @@ +.. _basic_concepts: + +Basic Concepts +============== + +The section introduces the basic concepts for XPU management used by dpctl. +As dpctl is based on SYCL the concepts should be familiar to users with prior +experience with SYCL. However, users of dpctl need not have any prior experience +with SYCL and the concepts presented here should be self-sufficient. We do not +go into all the SYCL-level details here and if needed readers should refer to a +more topical SYCL reference such as the :sycl_spec_2020:`SYCL 2020 spec <>`. + +* **Heterogeneous computing** + Refers to using multiple devices in a program. + +* **Host** + Every program starts by running on a host, and most of the lines of code in + a program, in particular lines of code implementing the Python interpreter + itself, are usually for the host. Hosts are customarily CPUs. + +* **Device** + A device is an XPU connected to a host that is programmable with a specific + device driver. Different types of devices can have different architectures + (CPUs, GPUs, FPGA, ASICs, DSP), but are programmable using the same + :oneapi:`oneAPI <>` programming model. + +* **Platform** + A device driver installed on the system is termed as a platform. As multiple + devices of the same type can share the same device driver, a platform may + contain multiple devices. Note that the same physical hardware (say, a GPU) + may be reflected as two separate devices if they can be programmed by more + than one platform, *e.g.*, the same GPU hardware can be listed as an + OpenCL GPU device and a Level-Zero GPU device. + +* **Context** + A context holds the run-time information needed to operate on a device or a + group of devices from the same platform. Contexts are relatively expensive + to create and should be reused as much as possible. + +* **Queue** + A queue is needed to schedule execution of any computation, or data + copying on the device. Queue construction requires specifying a device + and a context targeting that device as well as additional properties, + such as whether profiling information should be collected or whether submitted + tasks are executed in the order in which they were submitted. + +* **Event** + An event holds information related to computation/data movement operation + scheduled for execution on a queue, such as its execution status as well + as profiling information if the queue the task was submitted to allowed + for collection of such information. Events can be used to specify task + dependencies as well as to synchronize host and devices. + +* **USM** + Unified Shared Memory (USM) refers to pointer based device memory management. + USM allocations are bound to context. In other words, a pointer representing + USM allocation can be unambiguously mapped to the data it represents only + if the associated context is known. USM allocations are accessible by + computational kernels that are executed on a device, provided that the + allocation is bound to the same context that was used to construct the queue + where the kernel was scheduled for execution. + + Depending on the capability of the device, USM allocations can be a "device" + allocation, a "shared" allocation, or a "host" allocation. A "device" + allocation is not accessible from host, while "shared" or "host" allocations + are. "Host" allocation refers to an allocation in host memory that is + accessible from a device. + + "Shared" allocations are accessible by both host and device. Runtime manages + synchronization of host's and device's view into shared allocations. Initial + placement of the shared allocations is not defined. + +* **Backend** + Refers to an implementation of :oneapi:`oneAPI <>` programming model exposed + by the underlying runtime. From d0badc74373c3c8881c8023d8e0ee1f530489421 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:51:12 -0600 Subject: [PATCH 05/12] Add new sections for dpctl user manual. - A page for the explaining device selection. - A page for defining platforms and demonstrating platform querying. --- .../manual/dpctl/device_selection.rst | 150 ++++++++++++++++++ .../user_guides/manual/dpctl/platforms.rst | 35 ++++ 2 files changed, 185 insertions(+) create mode 100644 docs/docfiles/user_guides/manual/dpctl/device_selection.rst create mode 100644 docs/docfiles/user_guides/manual/dpctl/platforms.rst diff --git a/docs/docfiles/user_guides/manual/dpctl/device_selection.rst b/docs/docfiles/user_guides/manual/dpctl/device_selection.rst new file mode 100644 index 0000000000..75aa7cc18f --- /dev/null +++ b/docs/docfiles/user_guides/manual/dpctl/device_selection.rst @@ -0,0 +1,150 @@ +.. _device_selection: + +################ +Device Selection +################ + +Device selection refers to programmatically selecting a single device from +the set of :ref:`devices ` available on the system. + +Selecting a Specific Type of Device +----------------------------------- + +If a user needs to select a specific type of device such as a GPU, they can +directly use one of the helper functions included inside dpctl. Dpctl includes +:ref:`helper functions ` for selecting +a ``host``, a ``cpu``, a ``gpu``, an ``accelerator``, or the ``default`` device. +These functions are analogous to SYCL's built-in +:sycl_device_selector:`sycl::device_selector <>` classes. The scoring and +selection of a specific device when multiple devices of the same type are +available on a system is deferred to the underlying SYCL runtime. + +The example :ref:`fig-gpu-device-selection` shows the usage of the +:func:`dpctl.select_gpu_device()` device selection function. In case multiple +GPU devices are available, only one is returned based on the underlying scoring +logic inside the SYCL runtime. If the selection function was unable to select a +device a ``ValueError`` is raised. + +.. _fig-gpu-device-selection: + +.. literalinclude:: ../../../../../examples/python/device_selection.py + :language: python + :lines: 20-21, 38-52 + :caption: Selecting a GPU Device + :linenos: + +A possible output for the example :ref:`fig-gpu-device-selection` may be: + +.. program-output:: python ../examples/python/device_selection.py -r create_gpu_device + +Selecting a Device Using a Filter String +---------------------------------------- + +Along with using the default device selection functions, a more explicit way of +device selection involves the use of *filter strings* (refer +:oneapi_filter_selection:`oneAPI filter selection extension <>`). The example +:ref:`fig-gpu-device-selection` also demonstrates the use of a filter string +to create a GPU device directly. Using a filter string allows much more +fine-grained control for selecting a device. The following example +:ref:`fig-filter-selection` demonstrates usages of device selection using filter +strings. + +.. _fig-filter-selection: + +.. literalinclude:: ../../../../../examples/python/filter_selection.py + :language: python + :lines: 20-21, 23-53 + :caption: Device Creation With Filter Strings + :linenos: + +A possible output for the example :ref:`fig-filter-selection` may be: + +.. program-output:: python ../examples/python/filter_selection.py -r select_using_filter + + +It is also possible to pass a list of devices using a filter string. The +example :ref:`fig-adv-device-selection` demonstrates such a use case. The +filter string ``gpu,cpu`` implies that a GPU should be selected if available, +else a CPU device should be selected. + +.. _fig-adv-device-selection: + +.. literalinclude:: ../../../../../examples/python/device_selection.py + :language: python + :lines: 20-21, 55-67 + :caption: Selecting a GPU Device if Available + :linenos: + +A possible output for the example :ref:`fig-adv-device-selection` may be: + +.. program-output:: python ../examples/python/device_selection.py -r create_gpu_device_if_present + +.. Note:: + A **filter string** is a three-tuple that may specify the *backend*, + *device type*, and *device number* as a colon (:) separated string. The + backend specifies the type of device driver and can have a value such as + *host*, *opencl*, *level-zero*, or *cuda*. The device type can be *host*, + *gpu*, *cpu*, *accelerator*. And, the device number is a numeric value + specifying the ordinality of the device in the listing of devices as + determined by the SYCL runtime. Each of the backend, device type, and device + number value is optional, but at least one of them should be provided, + *i.e.*, ``opencl:gpu:0``, ``gpu:0``, ``gpu``, ``0``, and ``opencl:0`` are + all valid filter strings. + + The device listing including the device number value remain stable for + a given system unless the driver configuration is changed or the SYCL + runtime setting is changed using the ``SYCL_DEVICE_FILTER`` environment + variable. Please refer + :oneapi_filter_selection:`oneAPI filter selection extension <>` for more + detail. + +Advanced Device Selection +------------------------- + +Till now we have discussed device selection using methods that defer the +selection logic to the SYCL runtime. However, real-world applications may +require more precise control over device selection. Dpctl offers a way for users +to accomplish more advanced device selection. + +.. _fig-custom-device-selection: + +.. literalinclude:: ../../../../../examples/python/device_selection.py + :language: python + :lines: 20-21, 70-91 + :caption: Custom Device Selection + :linenos: + +The example :ref:`fig-custom-device-selection` shows a way of selecting a device +based off a specific hardware property. The :func:`dpctl.get_devices()` returns +a list of all *root* devices on the system, out of that list the devices that +support half-precision floating-point arithmetic are selected. Finally, a +"score" computed using the SYCL runtime's default device scoring logic that is +stored in :attr:`dpctl.SyclDevice.default_selector_score` is used to select a +single device. Refer the documentation of :class:`dpctl.SyclDevice` for a list +of hardware properties that may be used for device selection. + +.. _RootDevice: + +.. Note:: + A **root** device implies an unpartitioned device. A root device can be + partitioned into two or more :ref:`sub-devices ` + based on various criteria. For example, a CPU device with multiple NUMA + domains may be partitioned into multiple sub-devices, each representing a + sub-device. + +A convenience function :func:`dpctl.select_device_with_aspects()` is available +that makes it easy to select a device based on a set of specific aspects. The +example :ref:`fig-select-device-with-aspects` selects a device that +supports double precision arithmetic and SYCL USM shared memory allocation. + +.. _fig-select-device-with-aspects: + +.. literalinclude:: ../../../../../examples/python/device_selection.py + :language: python + :lines: 20-21, 94-103 + :caption: Device Selection Using Aspects + :linenos: + +A possible output for the example :ref:`fig-select-device-with-aspects` may be: + +.. program-output:: python ../examples/python/device_selection.py -r create_device_with_aspects diff --git a/docs/docfiles/user_guides/manual/dpctl/platforms.rst b/docs/docfiles/user_guides/manual/dpctl/platforms.rst new file mode 100644 index 0000000000..bf9c0ed981 --- /dev/null +++ b/docs/docfiles/user_guides/manual/dpctl/platforms.rst @@ -0,0 +1,35 @@ +.. _querying_platforms: + +######## +Platform +######## + +A platform abstracts a device driver for one or more XPU that is connected to +a host. The :class:`dpctl.SyclPlatform` class represents a platform and +abstracts the :sycl_platform:`sycl::platform <>` SYCL runtime class. + +Listing Available Platforms +--------------------------- + +The platforms available on a system can be queried using the +:func:`dpctl.lsplatform` function. In addition, as illustrated in the following +example it is possible to print out metadata about a platform. + +.. literalinclude:: ../../../../../examples/python/lsplatform.py + :language: python + :lines: 20-41 + :linenos: + +The example can be executed as follows: + +.. code-block:: bash + + python dpctl/examples/python/lsplatform.py -r all + +The possible output for the example may be: + +.. program-output:: python ../examples/python/lsplatform.py -r all + +.. Note:: + The verbosity for the output can be controlled using the ``verbosity`` + keyword argument. Refer :func:`dpctl.lsplatform`. From ddd5c6a5f463dfb7187c3db4a15773b1bc92fdaa Mon Sep 17 00:00:00 2001 From: oleksandr-pavlyk Date: Tue, 21 Dec 2021 01:52:45 -0600 Subject: [PATCH 06/12] Add a page defining the concept of device to dpctl user manual. --- .../manual/dpctl/device_selection.rst | 2 + .../user_guides/manual/dpctl/devices.rst | 140 ++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 docs/docfiles/user_guides/manual/dpctl/devices.rst diff --git a/docs/docfiles/user_guides/manual/dpctl/device_selection.rst b/docs/docfiles/user_guides/manual/dpctl/device_selection.rst index 75aa7cc18f..b68f930834 100644 --- a/docs/docfiles/user_guides/manual/dpctl/device_selection.rst +++ b/docs/docfiles/user_guides/manual/dpctl/device_selection.rst @@ -37,6 +37,8 @@ A possible output for the example :ref:`fig-gpu-device-selection` may be: .. program-output:: python ../examples/python/device_selection.py -r create_gpu_device +.. _sec-filter-selection: + Selecting a Device Using a Filter String ---------------------------------------- diff --git a/docs/docfiles/user_guides/manual/dpctl/devices.rst b/docs/docfiles/user_guides/manual/dpctl/devices.rst new file mode 100644 index 0000000000..154a772a9e --- /dev/null +++ b/docs/docfiles/user_guides/manual/dpctl/devices.rst @@ -0,0 +1,140 @@ +.. _devices: + +###### +Device +###### + +A device is an abstract representation for an XPU. The :class:`dpctl.SyclDevice` +class represents a device and is a wrapper over the +:sycl_device:`sycl::device <>` SYCL runtime class. + +Creating Devices +---------------- + +We touched upon device creation under the :ref:`device_selection` section. the +:class:`dpctl.SyclDevice` class includes a default constructor to create a +"default" device that is selected by the SYCL runtime. Users can also use +explicit :ref:`filter selector strings ` to create a +device. + +Listing Devices +--------------- + +:py:mod:`dpctl` provides the :func:`dpctl.get_devices` utility function to list +the available devices on a user's system. The list of devices returned depends +on available hardware, installed drivers, as well as by +:dpcpp_envar:`environment variables <>` influencing SYCL runtime +such as ``SYCL_DEVICE_FILTER`` or ``SYCL_DEVICE_ALLOWLIST``. + +.. _fig-listing-devices: + +.. literalinclude:: ../../../../../examples/python/device_selection.py + :language: python + :lines: 20-22, 107-131 + :caption: Listing Available Devices + :linenos: + +A possible output for the example :ref:`fig-listing-devices` may be: + +.. program-output:: python ../examples/python/device_selection.py -r list_devices + +The example :ref:`fig-listing-devices` demonstrates the usage of +:func:`dpctl.get_devices`. The list can be filtered based on +:class:`dpctl.backend` and :class:`dpctl.device_type`. The 0-based ordinal +position of a device in the output of :func:`dpctl.get_devices` corresponds to +the ``device id`` value in the filter selector string corresponding to the +device. For example, ``"opencl:cpu:0"`` refers to the first device in the list +returned by ``dpctl.get_devices(backend="opencl", device_type="cpu")``. If such +a list is empty, device construction call ``dpctl.SyclDevice("opencl:gpu:0")`` +will raise a ``ValueError``. + +.. Note:: + + Unless the system configuration changes, the list of devices returned by + :func:`dpctl.get_devices` and the relative ordering of devices in the list + is stable for every call to the function, even across different runs of an + application. + +Device Aspects and Information Descriptors +------------------------------------------ + +A device can have various *aspects* and *information descriptors* that describe +its hardware characteristics. :sycl_aspects:`Aspects <>` are boolean +characteristics of the device, whereas +:sycl_device_info:`information descriptors <>` are non-boolean characteristics +that provide more verbose information about the device. +:class:`dpctl.SyclDevice` exposes various Python properties that describe a +device's aspects and information descriptors. For example, the property +``has_aspect_fp16`` returns a boolean expression indicating whether a +particular device has aspect ``"fp16"``, indicating whether it supports the +IEEE-754 half-precision floating point type. Whereas, the ``name`` property is +an information descriptor that returns a string with the name of the device. + +.. _fig-available-properties: + +.. code-block:: Python + :caption: Listing Available Device Aspects and Information Descriptors + :linenos: + + import dpctl + import inspect + + def get_properties(cls, prop_name): + "Get name of properties of a class known to have `prop_name`" + known_property_t = type(getattr(cls, prop_name)) + return [n for n, o in inspect.getmembers(cls) if isinstance(o, known_property_t)] + + print(len(get_properties(dpctl.SyclDevice, "name"))) + # Output: 52 + +The example :ref:`fig-available-properties` demonstrates a programmatic way of +listing all the aspects and information descriptor properties in +:class:`dpctl.SyclDevice`. + +.. _sec-devices-sub-devices: + +Sub-devices +----------- + +It is possible for a device to be partitioned into "sub-devices". A sub-device +represents a sub-set of the computational units within a device that are grouped +based on some hardware criteria. For example, a two socket CPU device may be +partitioned into two sub-devices, where each sub-device represents a separate +:numa_domain:`NUMA domain <>`. Depending on the hardware characteristics and +the capabilities of the SYCL runtime, a sub-device may be partitioned further. + +For devices that support partitioning, the +:func:`dpctl.SyclDevice.create_sub_devices` can be used to create a list of +sub-devices. The requested partitioning scheme is indicated with use of the +required ``partition`` keyword. Several types of partitioning schemes are +available: + +* **Equal partitioning** + The partitioning scheme is specified as a list of positive integers + indicating a partitioning with each sub-device having the requested number + of parallel compute units. + +* **Affinity partitioning** + The partitioning scheme is specified as a string indicating an affinity + domain used to create sub-devices that sharing a common resource, such as + certain hardware cache levels. + +.. Note:: + + Use ``partition="next_partitionable"`` to partition along the next level of + architectural hierarchy. + +The following example shows an affinity-based partitioning of a CPU device +into sub-devices based on the available NUMA domains. + +.. _fig-partition-cpu: + +.. literalinclude:: ../../../../../examples/python/subdevices.py + :language: python + :lines: 17, 62-76 + :caption: Partitioning a CPU device + :linenos: + +A possible output for the example :ref:`fig-partition-cpu` may be: + +.. program-output:: python ../examples/python/subdevices.py -r subdivide_by_affinity From cee456fefa06911346b601721163bd68db805a40 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:53:34 -0600 Subject: [PATCH 07/12] Add some new examples references in docs. --- examples/python/_runner.py | 2 +- examples/python/device_selection.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/examples/python/_runner.py b/examples/python/_runner.py index b26865ec5e..552965e3f0 100644 --- a/examples/python/_runner.py +++ b/examples/python/_runner.py @@ -55,7 +55,7 @@ def run_examples(example_description, glbls_dict): print("Available examples:") print(", ".join(fns)) else: - print("No examples are availble.") + print("No examples are available.") exit(0) if args.run == "all": fns = [] diff --git a/examples/python/device_selection.py b/examples/python/device_selection.py index 373b9e401a..2278183c94 100644 --- a/examples/python/device_selection.py +++ b/examples/python/device_selection.py @@ -103,6 +103,32 @@ def create_device_with_aspects(): dev.print_device_info() +def list_devices(): + """Programmatically get a list of the available devices. + + The list can be filtered based on backend or device_type. + """ + print("Get a list of all devices:\n") + + for d in dpctl.get_devices(): + d.print_device_info() + print("=======================================\n") + + print("Get the list of only OpenCL devices:\n") + + for d in dpctl.get_devices(backend="opencl"): + d.print_device_info() + + print("=======================================\n") + + print("Get all OpenCL CPU devices:\n") + + for d in dpctl.get_devices(backend="opencl", device_type="cpu"): + d.print_device_info() + + print("=======================================\n") + + if __name__ == "__main__": import _runner as runner From 5786f74fa1b597a50e12b7b714150755c126e0b8 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:54:03 -0600 Subject: [PATCH 08/12] Add an introduction section to dpctl user manual. --- .../user_guides/manual/dpctl/intro.rst | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 docs/docfiles/user_guides/manual/dpctl/intro.rst diff --git a/docs/docfiles/user_guides/manual/dpctl/intro.rst b/docs/docfiles/user_guides/manual/dpctl/intro.rst new file mode 100644 index 0000000000..327178919e --- /dev/null +++ b/docs/docfiles/user_guides/manual/dpctl/intro.rst @@ -0,0 +1,38 @@ +.. _intro: + +dpctl +----- + +The Data Parallel Control (dpctl) package provides a Python runtime to access a +data-parallel computing resource or *XPU* from another Python application or +library, alleviating the need for the other Python packages to develop such a +runtime themselves. The term XPU denotes a diverse range of compute +architectures such as a CPU, GPU, FPGA, *etc.*, available to programmers on a +modern heterogeneous system. + +The dpctl runtime is built on top of the C++ SYCL standard and is designed to be +both vendor and architecture agnostic. If the underlying SYCL runtime supports +a type of architecture, the dpctl runtime will allow accessing that architecture +from Python. + +In its current form, dpctl relies on certain DPC++ extensions of SYCL standard. +Moreover, the binary distribution of dpctl uses the proprietary Intel(R) oneAPI +DPC++ runtime bundled as part of oneAPI and supports Intel XPU devices only. +However, dpctl is compatible with the runtime of open-source DPC++ SYCL bundle +that can be compiled to support a wide range of architectures including CUDA, +AMD ROC, and HIP. + +The user guide introduces the core features of dpctl and the underlying +concepts. The guide is meant primarily for users of the Python package. Library +and native extension developers should refer to the programmer's guide. + +Table of contents ++++++++++++++++++ + +.. toctree:: + :maxdepth: 2 + + basic_concepts + device_selection + platforms + devices From 23adb091450423f21b13e7ca3667200dd5cb76e2 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:55:42 -0600 Subject: [PATCH 09/12] Add the new dpctl user manual to tocs on other pages. --- docs/docfiles/user_guides/UserManual.rst | 10 ++++++++++ docs/index_doxyrest.rst.in | 7 ++++++- docs/index_no_doxyrest.rst.in | 6 +++++- 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 docs/docfiles/user_guides/UserManual.rst diff --git a/docs/docfiles/user_guides/UserManual.rst b/docs/docfiles/user_guides/UserManual.rst new file mode 100644 index 0000000000..9b955f1b0c --- /dev/null +++ b/docs/docfiles/user_guides/UserManual.rst @@ -0,0 +1,10 @@ +.. _user_manual: + +########### +User Manual +########### + +.. toctree:: + :maxdepth: 3 + + manual/dpctl/intro diff --git a/docs/index_doxyrest.rst.in b/docs/index_doxyrest.rst.in index cc3c17700b..9e064ed047 100644 --- a/docs/index_doxyrest.rst.in +++ b/docs/index_doxyrest.rst.in @@ -1,10 +1,15 @@ .. include:: ./docfiles/intro.rst + +How-to Guides +============= + .. toctree:: :maxdepth: 1 - :caption: User Guides docfiles/user_guides/QuickStart + docfiles/user_guides/UserManual + .. toctree:: :maxdepth: 1 diff --git a/docs/index_no_doxyrest.rst.in b/docs/index_no_doxyrest.rst.in index 1f367ce2d8..ac57d680ae 100644 --- a/docs/index_no_doxyrest.rst.in +++ b/docs/index_no_doxyrest.rst.in @@ -1,10 +1,14 @@ .. include:: ./docfiles/intro.rst +How-to Guides +============= + .. toctree:: :maxdepth: 1 - :caption: User Guides docfiles/user_guides/QuickStart + docfiles/user_guides/UserManual + .. toctree:: :maxdepth: 1 From 5df1242e43c5b35ee2c3e70fc337ba09e25ff6d4 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:56:22 -0600 Subject: [PATCH 10/12] Fixes and clean ups to exisitng rst pages. --- docs/docfiles/intro.rst | 7 ++-- docs/docfiles/user_guides/QuickStart.rst | 47 +++++++++++------------- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/docs/docfiles/intro.rst b/docs/docfiles/intro.rst index 115749b3da..892e66af72 100644 --- a/docs/docfiles/intro.rst +++ b/docs/docfiles/intro.rst @@ -2,10 +2,9 @@ Welcome to Data-parallel Control (dpctl)'s documentation! ========================================================= The data-parallel control (dpctl) library provides C and Python bindings for -`SYCL 2020 `_. -The SYCL 2020 features supported by dpctl are limited to those included by -Intel's DPCPP compiler and specifically cover the SYCL runtime classes described -in `Section 4.6 `_ +:sycl_spec_2020:`SYCL 2020 <>`. The SYCL 2020 features supported by dpctl are +limited to those included by Intel's DPCPP compiler and specifically cover the +SYCL runtime classes described in :sycl_runtime_classes:`Section 4.6 <>` of the SYCL 2020 specification. Apart from the bindings for these runtime classes, dpctl includes bindings for SYCL USM memory allocators and deallocators. Dpctl's Python API provides classes that implement diff --git a/docs/docfiles/user_guides/QuickStart.rst b/docs/docfiles/user_guides/QuickStart.rst index dcfede3bba..37d6097e4b 100644 --- a/docs/docfiles/user_guides/QuickStart.rst +++ b/docs/docfiles/user_guides/QuickStart.rst @@ -4,14 +4,8 @@ Quick Start Guide ################# - -.. contents:: Table of contents - :local: - :backlinks: none - :depth: 3 - Installing from oneAPI ----------------------- +====================== Dpctl is available as part of the oneAPI Intel Distribution of Python (IDP). Please follow `oneAPI installation guide`_ to install oneAPI. In this quick @@ -50,7 +44,7 @@ On Windows `GPU driver installation guide`_. Install Wheel package from Pypi -------------------------------- +=============================== Dpctl can also be istalled from Pypi. @@ -79,7 +73,7 @@ On Windows set PATH=\bin;\Library\bin;%PATH% Building from source --------------------- +==================== To build dpctl from source, we need dpcpp and GPU drivers (and optionally CPU OpenCL drivers). It is preferable to use the dpcpp compiler packaged as part of @@ -87,12 +81,13 @@ oneAPI. However, it is possible to use a custom build of dpcpp to build dpctl, especially if you want to enable CUDA support. Building using oneAPI dpcpp -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------- As before, oneAPI and graphics drivers should be installed on the system prior to proceeding further. -**Activate oneAPI as follows** +Activate oneAPI as follows +~~~~~~~~~~~~~~~~~~~~~~~~~~ On Linux @@ -106,7 +101,8 @@ On Windows call "%ONEAPI_ROOT%\setvars.bat" -**Build and install using conda-build** +Build and install using conda-build +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The conda-recipe included with the sources can be used to build the dpctl package. The advantage of this approach is that all dependencies are pulled in @@ -136,7 +132,9 @@ After building the conda package you may install it by executing: You could face issues with conda-build version 3.20. Use conda-build 3.18 instead. -**Build and Install with setuptools** + +Build and install with setuptools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To build using Python ``setuptools``, the following packages should be installed: @@ -164,13 +162,13 @@ to build and install python setup.py develop Building using custom dpcpp -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------- It is possible to build dpctl from source using .. _DPC++ toolchain: https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md instead of the DPC++ compiler that comes with oneAPI. One reason for doing this may be to enable support for CUDA devices. -Following steps in :ref:`Build and Install with setuptools` use command line +Following steps in `Build and install with setuptools`_ use command line option :code:`--sycl-compiler-prefix`, for example: .. code-block:: bash @@ -181,7 +179,7 @@ Available options and their descriptions can be retrieved using option :code:`--help`. Using dpctl ------------ +=========== Dpctl requires a DPC++ runtime. When dpctl is installed via conda then it uses the DPC++ runtime from ``dpcpp_cpp_rt`` package that is part of IDP. When using @@ -190,10 +188,10 @@ the system. The easiest way to setup a DPC++ runtime will be by activating oneAPI. Running examples and tests --------------------------- +========================== Running the examples -~~~~~~~~~~~~~~~~~~~~ +-------------------- After setting up dpctl you can try out the Python examples as follows: @@ -213,7 +211,7 @@ located under *examples/cython*. Each example in the folder can be built using examples. Running the Python tests -~~~~~~~~~~~~~~~~~~~~~~~~ +------------------------ The dpctl Python test suite can be executed as follows: @@ -222,14 +220,13 @@ The dpctl Python test suite can be executed as follows: pytest --pyargs dpctl -Building the C API shared library ---------------------------------- +Building the DPCTLSyclInterface library +======================================= -The dpctl C API is a shared library called libDPCTLSyclInterface and is built -together when build the Python package. However, it is possible to only build -the C API as a standalone library. To do so, you will need ``cmake``, +The libDPCTLSyclInterface is a shared library used by the Python package. +To build the library you will need ``DPC++`` toolchain, ``cmake``, ``ninja`` or ``make``, and optionally ``gtest 1.10`` if you wish to run the -C API test suite. +test suite. For example, on Linux the following script can be used to build the C oneAPI library. From db5fd7ac58c7ac1f7303ecea3b375f659fd6356c Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Tue, 21 Dec 2021 01:57:19 -0600 Subject: [PATCH 11/12] Remove urls.rst as it is superseded by urls.json. --- docs/docfiles/urls.rst | 11 ----------- docs/generate_rst.py | 14 -------------- 2 files changed, 25 deletions(-) delete mode 100644 docs/docfiles/urls.rst diff --git a/docs/docfiles/urls.rst b/docs/docfiles/urls.rst deleted file mode 100644 index 25bbf743c8..0000000000 --- a/docs/docfiles/urls.rst +++ /dev/null @@ -1,11 +0,0 @@ - -.. _buffer protocol: https://docs.python.org/3/c-api/buffer.html -.. _Data API: https://data-apis.github.io/array-api/latest/ - -.. _Section 4.6: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes -.. _SYCL 2020 spec: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html -.. _sycl platform: https://sycl.readthedocs.io/en/latest/iface/platform.html -.. _sycl device: https://sycl.readthedocs.io/en/latest/iface/device.html -.. _sycl queue: https://sycl.readthedocs.io/en/latest/iface/queue.html -.. _sycl event: https://sycl.readthedocs.io/en/latest/iface/event.html -.. _sycl context: https://sycl.readthedocs.io/en/latest/iface/context.html diff --git a/docs/generate_rst.py b/docs/generate_rst.py index b2990fef2b..1c80f4bc4a 100644 --- a/docs/generate_rst.py +++ b/docs/generate_rst.py @@ -118,16 +118,6 @@ def _write_underlined(o, s, c): _write_line(o, c * len(s)) -def _write_include_urls(o): - """[summary] - - Args: - o ([type]): [description] - """ - _write_empty_line(o) - _write_line(o, ".. include:: ../urls.rst") - - def _write_hidden_toc(o, list_of_obj_names, prefix_str="", suffix_str=""): """[summary] @@ -368,9 +358,6 @@ def write_rubric(o, indent, rubric_display, rubric_tag, cls_qualname): output, ".. autofunction:: " + ".".join([cls_qualname, n]), ) - - _write_include_urls(output) - return output.getvalue() @@ -576,7 +563,6 @@ def _write_function_groups_summary(o, mod, groups): _write_empty_line(output) _write_exceptions_summary_table(output, mod) _write_empty_line(output) - _write_include_urls(output) return output.getvalue() From 94d40791d852f6c1579d4cbba48ded65f964555e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 23 Dec 2021 07:50:07 -0600 Subject: [PATCH 12/12] Corrected Equal Partition to Count Partitioning --- docs/docfiles/user_guides/manual/dpctl/devices.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/docfiles/user_guides/manual/dpctl/devices.rst b/docs/docfiles/user_guides/manual/dpctl/devices.rst index 154a772a9e..ca2529b0f4 100644 --- a/docs/docfiles/user_guides/manual/dpctl/devices.rst +++ b/docs/docfiles/user_guides/manual/dpctl/devices.rst @@ -109,10 +109,11 @@ sub-devices. The requested partitioning scheme is indicated with use of the required ``partition`` keyword. Several types of partitioning schemes are available: -* **Equal partitioning** +* **Count partitioning** The partitioning scheme is specified as a list of positive integers indicating a partitioning with each sub-device having the requested number - of parallel compute units. + of parallel compute units, or as a single positive integer indicating + equal-counts partition. * **Affinity partitioning** The partitioning scheme is specified as a string indicating an affinity